R Markdown

airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(airlines.df)
##                     vars   n    mean      sd  median trimmed     mad   min
## Airline*               1 458    3.01    1.65    2.00    2.89    1.48  1.00
## Aircraft*              2 458    1.67    0.47    2.00    1.71    0.00  1.00
## FlightDuration         3 458    7.58    3.54    7.79    7.57    4.81  1.25
## TravelMonth*           4 458    2.56    1.17    3.00    2.58    1.48  1.00
## IsInternational*       5 458    1.91    0.28    2.00    2.00    0.00  1.00
## SeatsEconomy           6 458  202.31   76.37  185.00  194.64   85.99 78.00
## SeatsPremium           7 458   33.65   13.26   36.00   33.35   11.86  8.00
## PitchEconomy           8 458   31.22    0.66   31.00   31.26    0.00 30.00
## PitchPremium           9 458   37.91    1.31   38.00   38.05    0.00 34.00
## WidthEconomy          10 458   17.84    0.56   18.00   17.81    0.00 17.00
## WidthPremium          11 458   19.47    1.10   19.00   19.53    0.00 17.00
## PriceEconomy          12 458 1327.08  988.27 1242.00 1244.40 1159.39 65.00
## PricePremium          13 458 1845.26 1288.14 1737.00 1799.05 1845.84 86.00
## PriceRelative         14 458    0.49    0.45    0.36    0.42    0.41  0.02
## SeatsTotal            15 458  235.96   85.29  227.00  228.73   90.44 98.00
## PitchDifference       16 458    6.69    1.76    7.00    6.76    0.00  2.00
## WidthDifference       17 458    1.63    1.19    1.00    1.53    0.00  0.00
## PercentPremiumSeats   18 458   14.65    4.84   13.21   14.31    2.68  4.71
##                         max   range  skew kurtosis    se
## Airline*               6.00    5.00  0.61    -0.95  0.08
## Aircraft*              2.00    1.00 -0.72    -1.48  0.02
## FlightDuration        14.66   13.41 -0.07    -1.12  0.17
## TravelMonth*           4.00    3.00 -0.14    -1.46  0.05
## IsInternational*       2.00    1.00 -2.91     6.50  0.01
## SeatsEconomy         389.00  311.00  0.72    -0.36  3.57
## SeatsPremium          66.00   58.00  0.23    -0.46  0.62
## PitchEconomy          33.00    3.00 -0.03    -0.35  0.03
## PitchPremium          40.00    6.00 -1.51     3.52  0.06
## WidthEconomy          19.00    2.00 -0.04    -0.08  0.03
## WidthPremium          21.00    4.00 -0.08    -0.31  0.05
## PriceEconomy        3593.00 3528.00  0.51    -0.88 46.18
## PricePremium        7414.00 7328.00  0.50     0.43 60.19
## PriceRelative          1.89    1.87  1.17     0.72  0.02
## SeatsTotal           441.00  343.00  0.70    -0.53  3.99
## PitchDifference       10.00    8.00 -0.54     1.78  0.08
## WidthDifference        4.00    4.00  0.84    -0.53  0.06
## PercentPremiumSeats   24.69   19.98  0.71     0.28  0.23
hist(airlines.df$SeatsEconomy, 
     main="Histogram of Seats Economy",
     xlab="Seats Of Economy Class" )

hist(airlines.df$SeatsPremium, 
     main="Histogram of Seats Premium",
     xlab="Seats Of Premium Economy Class" )

hist(airlines.df$PitchEconomy, 
     main="Histogram of Pitch Economy",
     xlab="Pitch Of Economy Class" )

hist(airlines.df$PitchPremium, 
     main="Histogram of Pitch Premium",
     xlab="Pitch Of PremiumEconomy Class" )

hist(airlines.df$WidthEconomy, 
     main="Histogram of Seats Width Economy",
     xlab="Seats Width Of Economy Class" )

hist(airlines.df$WidthPremium, 
     main="Histogram of Seats Width Premium",
     xlab="Seats Width Of Premium Economy Class" )

hist(airlines.df$PriceEconomy, 
     main="Histogram of Price of Tickets In Economy Class",
     xlab="Price of Tickets In Economy Class" )

hist(airlines.df$PricePremium, 
     main="Histogram of Price of Tickets In PremiumEconomy Class",
     xlab="Price of Tickets In Economy Class" )

par(mfrow=c(1, 2))
plot(x=airlines.df$SeatsEconomy, y=airlines.df$SeatsPremium)
plot(x=airlines.df$PitchEconomy, y=airlines.df$PitchPremium)

plot(x=airlines.df$WidthEconomy, y=airlines.df$WidthPremium)
plot(x=airlines.df$PriceEconomy, y=airlines.df$PricePremium)

par(mfrow=c(1, 1))

library(corrgram) 
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(airlines.df, order=FALSE, lower.panel=panel.shade, upper.panel=panel.pie, diag.panel=panel.minmax, text.panel=panel.txt, main="Corrgram of airlines.df intercorrelations")

## Hypothesis : 1. There is no effect of Seats, SeatWidth and Seatpitch On the Ticket Price Of Economic and Premium Economic Classes of Airlines .
##              2. There is no effect of No. of Seats in Economic Class To the No. of Seats in PRemium Economic class.
##              3. Price Of tickets in Economy Class to The Price in tickets of Premium Economy Class .

## There is no relation between Price Of Economy to Type Of flight(Domestic or International)

t.test(PriceEconomy ~ IsInternational ,data = airlines.df, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  PriceEconomy by IsInternational
## t = -6.8166, df = 456, p-value = 2.965e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1369.8658  -756.7693
## sample estimates:
##      mean in group Domestic mean in group International 
##                     356.625                    1419.943
t.test(PricePremium ~ IsInternational ,data = airlines.df, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  PricePremium by IsInternational
## t = -8.0006, df = 456, p-value = 1.033e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1991.772 -1206.246
## sample estimates:
##      mean in group Domestic mean in group International 
##                     385.900                    1984.909
## P-value < 0.05 . Therefore , The price Of tickets in Economy and Price Of tickets in Premium flight are dependent on type of Flight .
##Therefore , Successfull in rejecting the Hypothesis .

## Hypothesis : 
##1.There is no effect of Seats, SeatWidth and Seatpitch On the Ticket Price Of Economic and Premium Economic Classes of Airlines .

Model1 = PriceEconomy~SeatsEconomy+WidthEconomy+PitchEconomy+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect <- lm(Model1 , data = airlines.df)
summary(effect)
## 
## Call:
## lm(formula = Model1, data = airlines.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2076.00  -232.27    71.06   319.54  1195.57 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   1046.4681  3343.4663   0.313  0.75444    
## SeatsEconomy                    -0.7980     0.3958  -2.016  0.04440 *  
## WidthEconomy                   -42.2485   101.4948  -0.416  0.67742    
## PitchEconomy                     9.8126    88.2097   0.111  0.91148    
## IsInternationalInternational  1329.1036   269.5574   4.931 1.16e-06 ***
## TravelMonthJul                 100.5417    77.5219   1.297  0.19533    
## TravelMonthOct                 -45.8224    65.8492  -0.696  0.48688    
## TravelMonthSep                   0.8848    65.6567   0.013  0.98925    
## FlightDuration                  98.9510     9.5863  10.322  < 2e-16 ***
## AircraftBoeing                 194.6460    68.8440   2.827  0.00491 ** 
## AirlineBritish               -1368.1788   124.1997 -11.016  < 2e-16 ***
## AirlineDelta                  -677.5162   247.7959  -2.734  0.00650 ** 
## AirlineJet                   -2157.0207   176.3881 -12.229  < 2e-16 ***
## AirlineSingapore             -1994.9912   179.3738 -11.122  < 2e-16 ***
## AirlineVirgin                -1149.5624   135.4383  -8.488 3.20e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 524.6 on 443 degrees of freedom
## Multiple R-squared:  0.7269, Adjusted R-squared:  0.7182 
## F-statistic: 84.21 on 14 and 443 DF,  p-value: < 2.2e-16
## Thus : Regression Model :
## Price = b0 + b1SeatsEconomy + b2WidthEconomy + b3PitchEconomy + b4*IsInternational+b5*TravelMonth+b6*FlightDuration+b7Aircraft+b8Airline+e .
## Price = 1046.4681(-0.7980)*SeatEconomy+(-42.2485)*WidthEconomy+9.8126*PitchEconomy+1329.1036*IsInternational+100.5417*TravelMonthJul+(-45.8224)*TravelMonthOct+0.8848*TravelMothSep+98.9510*FightDuration+194.6460*AircraftBoeing+(-1368.1788)*AirlineBritish+(-677.5162)*AirlineDelta..



## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 72.69 % , The variables are strongly related .


##                                Pr(>|t|)    
#SeatsEconomy                     0.04440 *  
#WidthEconomy                     0.67742    
#PitchEconomy                     0.91148    
#IsInternational                  1.16e-06 ***
#TravelMonthJul                   0.19533    
#TravelMonthOct                   0.48688    
#TravelMonthSep                   0.98925    
#FlightDuration                   2e-16 ***
#AircraftBoeing                   0.00491 ** 
#AirlineBritish                   2e-16 ***
#AirlineDelta                     0.00650 ** 
#AirlineJet                       2e-16 ***
#AirlineSingapore                 2e-16 ***
#AirlineVirgin                    3.20e-16 ***

## FRom the Individual p-values we come to know that the factors related for the Price Of ticket Im Economic class Are - SeatsEconomy,ISInternational,FlightDuration,AircraftBoeingand Airline .

Model1 = PricePremium~SeatsPremium+WidthPremium+PitchPremium+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect1 <- lm(Model1 , data = airlines.df)
summary(effect1)
## 
## Call:
## lm(formula = Model1, data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2094.8  -364.8    52.3   360.9  4387.4 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -3930.8320  5562.5878  -0.707  0.48015    
## SeatsPremium                     0.2668     4.8987   0.054  0.95658    
## WidthPremium                    57.8484   224.6487   0.258  0.79691    
## PitchPremium                    82.9102   226.5945   0.366  0.71462    
## IsInternationalInternational  1017.7044   722.8915   1.408  0.15988    
## TravelMonthJul                  89.6143   111.7754   0.802  0.42314    
## TravelMonthOct                 -26.7792    94.9716  -0.282  0.77810    
## TravelMonthSep                  -2.1725    94.7113  -0.023  0.98171    
## FlightDuration                 175.8219    14.1080  12.463  < 2e-16 ***
## AircraftBoeing                 261.8917    91.7820   2.853  0.00453 ** 
## AirlineBritish                -983.1179   136.8293  -7.185 2.87e-12 ***
## AirlineDelta                  -343.7183   562.5606  -0.611  0.54152    
## AirlineJet                   -2081.9591   287.7882  -7.234 2.07e-12 ***
## AirlineSingapore             -2173.3836   272.6682  -7.971 1.35e-14 ***
## AirlineVirgin                 -503.6319   475.1096  -1.060  0.28971    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 756.3 on 443 degrees of freedom
## Multiple R-squared:  0.6659, Adjusted R-squared:  0.6553 
## F-statistic: 63.06 on 14 and 443 DF,  p-value: < 2.2e-16
## p-value is less than 0.05 . Hence Successfully Rejected the Null Hypothesis.
## Multiple R-squared:  0.6659 i.e 66.59 percent . Hence variables are strongly related.

## 2. There is no effect of No. of Seats in Economic Class To the No. of Seats in PRemium Economic class.


effect2 <- lm(SeatsPremium~SeatsEconomy , data = airlines.df)
summary(effect2)
## 
## Call:
## lm(formula = SeatsPremium ~ SeatsEconomy, data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.273  -6.645  -2.064  10.424  17.936 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.690683   1.372381   8.519 2.37e-16 ***
## SeatsEconomy  0.108534   0.006347  17.100  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.36 on 456 degrees of freedom
## Multiple R-squared:  0.3907, Adjusted R-squared:  0.3894 
## F-statistic: 292.4 on 1 and 456 DF,  p-value: < 2.2e-16
## Thus : Regression Model :
## SeatsPremium = b0 + b1SeatsEconomy + e .
## SeatsPremium = 11.690683+0.108534 *SeatEconomy
## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 39.07 % , The variables are moderately related .

effect3 <- lm(PricePremium~PriceEconomy , data = airlines.df)
summary(effect3)
## 
## Call:
## lm(formula = PricePremium ~ PriceEconomy, data = airlines.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -805.5 -315.6 -111.1  157.5 3483.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  286.09096   43.71533   6.544 1.61e-10 ***
## PriceEconomy   1.17489    0.02643  44.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 558.4 on 456 degrees of freedom
## Multiple R-squared:  0.8125, Adjusted R-squared:  0.8121 
## F-statistic:  1976 on 1 and 456 DF,  p-value: < 2.2e-16
## Thus : Regression Model :
## PricePremium = b0 + b1PriceEconomy + e .
## SeatsPremium = 286.09096+1.17489 *PriceEconomy
## P-value < 0.05 Therefore , Successfull in Rejecting the Null Hypothesis .
## Multiple R-squared = 81.25 % , The variables are strongly related .

## Factors on Which difference between PriceEconomy and Price Premium is dependent.

Model1 = PriceRelative~SeatsEconomy+WidthEconomy+PitchEconomy+IsInternational+TravelMonth + FlightDuration+Aircraft+Airline
effect <- lm(Model1 , data = airlines.df)
summary(effect)
## 
## Call:
## lm(formula = Model1, data = airlines.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79257 -0.21656 -0.04377  0.11681  1.47095 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   5.5367478  2.2920593   2.416  0.01611 *  
## SeatsEconomy                  0.0004907  0.0002714   1.808  0.07127 .  
## WidthEconomy                 -0.0874560  0.0695781  -1.257  0.20944    
## PitchEconomy                 -0.1329101  0.0604707  -2.198  0.02847 *  
## IsInternationalInternational  0.0133091  0.1847907   0.072  0.94262    
## TravelMonthJul               -0.0191317  0.0531439  -0.360  0.71902    
## TravelMonthOct                0.0528023  0.0451419   1.170  0.24275    
## TravelMonthSep               -0.0128973  0.0450099  -0.287  0.77460    
## FlightDuration                0.0358711  0.0065717   5.458 8.01e-08 ***
## AircraftBoeing                0.0179156  0.0471949   0.380  0.70442    
## AirlineBritish                0.1705060  0.0851431   2.003  0.04583 *  
## AirlineDelta                  0.0902847  0.1698725   0.531  0.59535    
## AirlineJet                    0.6657522  0.1209200   5.506 6.24e-08 ***
## AirlineSingapore              0.3824173  0.1229668   3.110  0.00199 ** 
## AirlineVirgin                 0.4458910  0.0928475   4.802 2.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3596 on 443 degrees of freedom
## Multiple R-squared:  0.3825, Adjusted R-squared:  0.363 
## F-statistic:  19.6 on 14 and 443 DF,  p-value: < 2.2e-16
## According to independent p-values Factors are :
## PitchEconomy , Flight Duration , AirlineBritish , AirlineJet , AirlineSingapore , AirlineVirgin . 



## Mean Price Of Economic And Premium Economic class .

mean(airlines.df$PriceEconomy)
## [1] 1327.076
mean(airlines.df$PricePremium)
## [1] 1845.258