Read data:

air.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(air.df)

Getting Summary stats:

summary(air.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69

Converting into factor variables:

air.df$Airline<-factor(air.df$Airline)
air.df$Aircraft <- factor(air.df$Aircraft)
air.df$TravelMonth <- factor(air.df$TravelMonth)
air.df$IsInternational <- factor(air.df$IsInternational)

Plotting each variable indiviually:

plot(air.df$Airline)
plot(air.df$Airline)

plot(air.df$Aircraft)

plot(air.df$TravelMonth)

plot(air.df$IsInternational)

flight duration

hist(air.df$FlightDuration,main = " Flight Duration",xlab ="Flight Duration",ylab = "Frequency")

number of seats

hist(air.df$SeatsEconomy,main = "number of seats of economy class",xlab = "Number of Economy Seat",ylab = "Frequency")

hist(air.df$SeatsPremium, main = " number of seats of premium class", xlab= "Seat in Premium class",ylab = "Frequency")

pitch

hist(air.df$PitchEconomy,main = " pitch of economy class seats",xlab="Pitch of economy class seats",ylab = "Frequency")

hist(air.df$PitchPremium,main = "pitch of premium class seats",xlab = "Pitch of premium class seats",ylab = "Frequency")

width of seats

hist(air.df$WidthEconomy,main = "Distribution of width of economic class seats",xlab = "Width of Economy class seats",ylab = "Frequency")

hist(air.df$WidthPremium,main = "Distribution of width of premium class seats",xlab = "Price of economy class seats",ylab = "Frequency")

price of seats

hist(air.df$PricePremium,main = "Distribution of price in premium class seats",xlab = "Price of premium class seats",ylab = "Frequency")

hist(air.df$PriceEconomy,main = "Distribution of price in economy class seats",xlab = "Price of economy class seats",ylab = "Frequency")

hist(air.df$PriceRelative,main = "Distribution of Relative price",xlab = "Relative(PricePremium - PriceEconomy) / PriceEconomy",ylab = "Frequency")

total seats

hist(air.df$SeatsTotal,main = "Distribution of total seats(Economy + premium)",xlab = "Total seats",ylab = "Frequency")

pitch difference

hist(air.df$PitchDifference,main = "Distribution of Pitch difference",xlab = "Pitch difference",ylab = "Frequency")

width difffrence

hist(air.df$WidthDifference,main = "Distribution of Width difference",xlab = "Width difference",ylab = "Frequency")

hist(air.df$PercentPremiumSeats,main = "Distribution of Percentage of premium seats",xlab ="Percentage of premium seats" ,ylab = "Frequency")

Plotting Corrgram to get the correlation between the matrix:

library(corrgram)
corrgram(air.df, order=TRUE, lower.panel=panel.shade,
          upper.panel=panel.pie, text.panel=panel.txt,
          main="Corrgram of store variables")

Generating correlation matrix:

cor(air.df[,c(3,6:18)])
##                     FlightDuration SeatsEconomy SeatsPremium PitchEconomy
## FlightDuration          1.00000000  0.195621187  0.161236400   0.29377174
## SeatsEconomy            0.19562119  1.000000000  0.625056587   0.14412692
## SeatsPremium            0.16123640  0.625056587  1.000000000  -0.03421296
## PitchEconomy            0.29377174  0.144126924 -0.034212963   1.00000000
## PitchPremium            0.09621471  0.119221250  0.004883123  -0.55060624
## WidthEconomy            0.45647720  0.373670252  0.455782883   0.29448586
## WidthPremium            0.10343747  0.102431959 -0.002717527  -0.53929285
## PriceEconomy            0.56664039  0.128167220  0.113642176   0.36866123
## PricePremium            0.64873981  0.177000928  0.217612376   0.22614179
## PriceRelative           0.12107501  0.003956939 -0.097196009  -0.42302204
## SeatsTotal              0.20023299  0.992607966  0.715171053   0.12373524
## PitchDifference        -0.03749288  0.035318044  0.016365566  -0.78254993
## WidthDifference        -0.11856070 -0.080670148 -0.216168666  -0.63557430
## PercentPremiumSeats     0.06051625 -0.330935223  0.485029771  -0.10280880
##                     PitchPremium WidthEconomy WidthPremium PriceEconomy
## FlightDuration       0.096214708   0.45647720  0.103437469   0.56664039
## SeatsEconomy         0.119221250   0.37367025  0.102431959   0.12816722
## SeatsPremium         0.004883123   0.45578288 -0.002717527   0.11364218
## PitchEconomy        -0.550606241   0.29448586 -0.539292852   0.36866123
## PitchPremium         1.000000000  -0.02374087  0.750259029   0.05038455
## WidthEconomy        -0.023740873   1.00000000  0.081918728   0.06799061
## WidthPremium         0.750259029   0.08191873  1.000000000  -0.05704522
## PriceEconomy         0.050384550   0.06799061 -0.057045224   1.00000000
## PricePremium         0.088539147   0.15054837  0.064020043   0.90138870
## PriceRelative        0.417539056  -0.04396116  0.504247591  -0.28856711
## SeatsTotal           0.107512784   0.40545860  0.091297500   0.13243313
## PitchDifference      0.950591466  -0.12722421  0.760121272  -0.09952511
## WidthDifference      0.703281797  -0.39320512  0.884149655  -0.08449975
## PercentPremiumSeats -0.175487414   0.22714172 -0.183312058   0.06532232
##                     PricePremium PriceRelative  SeatsTotal PitchDifference
## FlightDuration        0.64873981   0.121075014  0.20023299     -0.03749288
## SeatsEconomy          0.17700093   0.003956939  0.99260797      0.03531804
## SeatsPremium          0.21761238  -0.097196009  0.71517105      0.01636557
## PitchEconomy          0.22614179  -0.423022038  0.12373524     -0.78254993
## PitchPremium          0.08853915   0.417539056  0.10751278      0.95059147
## WidthEconomy          0.15054837  -0.043961160  0.40545860     -0.12722421
## WidthPremium          0.06402004   0.504247591  0.09129750      0.76012127
## PriceEconomy          0.90138870  -0.288567110  0.13243313     -0.09952511
## PricePremium          1.00000000   0.031846537  0.19232533     -0.01806629
## PriceRelative         0.03184654   1.000000000 -0.01156894      0.46873025
## SeatsTotal            0.19232533  -0.011568942  1.00000000      0.03416915
## PitchDifference      -0.01806629   0.468730249  0.03416915      1.00000000
## WidthDifference      -0.01151218   0.485802437 -0.10584398      0.76089108
## PercentPremiumSeats   0.11639097  -0.161565556 -0.22091465     -0.09264869
##                     WidthDifference PercentPremiumSeats
## FlightDuration          -0.11856070          0.06051625
## SeatsEconomy            -0.08067015         -0.33093522
## SeatsPremium            -0.21616867          0.48502977
## PitchEconomy            -0.63557430         -0.10280880
## PitchPremium             0.70328180         -0.17548741
## WidthEconomy            -0.39320512          0.22714172
## WidthPremium             0.88414965         -0.18331206
## PriceEconomy            -0.08449975          0.06532232
## PricePremium            -0.01151218          0.11639097
## PriceRelative            0.48580244         -0.16156556
## SeatsTotal              -0.10584398         -0.22091465
## PitchDifference          0.76089108         -0.09264869
## WidthDifference          1.00000000         -0.27559416
## PercentPremiumSeats     -0.27559416          1.00000000

Impelmentin regression for price of economy class:

fit1<-lm(air.df$PriceEconomy~ air.df$FlightDuration+air.df$SeatsEconomy+air.df$SeatsPremium+air.df$PitchDifference+air.df$WidthDifference+air.df$PricePremium)

Getting summary stats of regression analysis:

summary(fit1)
## 
## Call:
## lm(formula = air.df$PriceEconomy ~ air.df$FlightDuration + air.df$SeatsEconomy + 
##     air.df$SeatsPremium + air.df$PitchDifference + air.df$WidthDifference + 
##     air.df$PricePremium)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2365.36  -217.56    14.31   160.67   912.01 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            469.97042  101.91927   4.611 5.22e-06 ***
## air.df$FlightDuration  -14.65160    7.29161  -2.009  0.04509 *  
## air.df$SeatsEconomy      0.60660    0.32494   1.867  0.06258 .  
## air.df$SeatsPremium    -10.16913    1.98929  -5.112 4.72e-07 ***
## air.df$PitchDifference  -4.56145   17.67483  -0.258  0.79647    
## air.df$WidthDifference -82.47864   27.17490  -3.035  0.00254 ** 
## air.df$PricePremium      0.73312    0.02011  36.461  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 408.6 on 451 degrees of freedom
## Multiple R-squared:  0.8313, Adjusted R-squared:  0.829 
## F-statistic: 370.3 on 6 and 451 DF,  p-value: < 2.2e-16

Getting variable whose p-vlaue is significant:

which(summary(fit1)$coefficients[,4]<0.05)
##            (Intercept)  air.df$FlightDuration    air.df$SeatsPremium 
##                      1                      2                      4 
## air.df$WidthDifference    air.df$PricePremium 
##                      6                      7

Getting variable whose p-vlaue is not significant:

which(summary(fit1)$coefficients[,4]>0.05)
##    air.df$SeatsEconomy air.df$PitchDifference 
##                      3                      5

Getting confidence interval:

confint(fit1)
##                                2.5 %      97.5 %
## (Intercept)             269.67479442 670.2660411
## air.df$FlightDuration   -28.98135553  -0.3218542
## air.df$SeatsEconomy      -0.03198731   1.2451917
## air.df$SeatsPremium     -14.07856714  -6.2597021
## air.df$PitchDifference  -39.29670094  30.1737960
## air.df$WidthDifference -135.88377697 -29.0735063
## air.df$PricePremium       0.69360554   0.7726346

Getting coefficients:

coefficients(fit1)
##            (Intercept)  air.df$FlightDuration    air.df$SeatsEconomy 
##            469.9704178            -14.6516049              0.6066022 
##    air.df$SeatsPremium air.df$PitchDifference air.df$WidthDifference 
##            -10.1691346             -4.5614525            -82.4786416 
##    air.df$PricePremium 
##              0.7331200

Impelmentin regression for price of premium class:

fit2<-lm(air.df$PricePremium~ air.df$FlightDuration+air.df$SeatsEconomy+air.df$SeatsPremium+air.df$PitchDifference+air.df$WidthDifference+air.df$PriceEconomy)

Getting summary stats of regression analysis:

summary(fit2)
## 
## Call:
## lm(formula = air.df$PricePremium ~ air.df$FlightDuration + air.df$SeatsEconomy + 
##     air.df$SeatsPremium + air.df$PitchDifference + air.df$WidthDifference + 
##     air.df$PriceEconomy)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -890.7 -256.1  -24.4  154.8 3545.4 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -473.51759  120.89029  -3.917 0.000104 ***
## air.df$FlightDuration    74.60808    7.88567   9.461  < 2e-16 ***
## air.df$SeatsEconomy      -0.82139    0.38253  -2.147 0.032305 *  
## air.df$SeatsPremium      15.21609    2.30282   6.608 1.10e-10 ***
## air.df$PitchDifference  -28.15622   20.79226  -1.354 0.176361    
## air.df$WidthDifference  149.55172   31.58030   4.736 2.93e-06 ***
## air.df$PriceEconomy       1.01851    0.02793  36.461  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 481.6 on 451 degrees of freedom
## Multiple R-squared:  0.862,  Adjusted R-squared:  0.8602 
## F-statistic: 469.6 on 6 and 451 DF,  p-value: < 2.2e-16

Getting variable whose p-vlaue is significant:

which(summary(fit2)$coefficients[,4]<0.05)
##            (Intercept)  air.df$FlightDuration    air.df$SeatsEconomy 
##                      1                      2                      3 
##    air.df$SeatsPremium air.df$WidthDifference    air.df$PriceEconomy 
##                      4                      6                      7

Getting variable whose p-vlaue is not significant:

which(summary(fit2)$coefficients[,4]>0.05)
## air.df$PitchDifference 
##                      5

Getting confidence interval:

confint(fit2)
##                               2.5 %        97.5 %
## (Intercept)            -711.0957670 -235.93941844
## air.df$FlightDuration    59.1108533   90.10530535
## air.df$SeatsEconomy      -1.5731505   -0.06962792
## air.df$SeatsPremium      10.6905016   19.74167400
## air.df$PitchDifference  -69.0179491   12.70551341
## air.df$WidthDifference   87.4889300  211.61451794
## air.df$PriceEconomy       0.9636146    1.07340835

Getting coefficients:

coefficients(fit2)
##            (Intercept)  air.df$FlightDuration    air.df$SeatsEconomy 
##           -473.5175927             74.6080793             -0.8213892 
##    air.df$SeatsPremium air.df$PitchDifference air.df$WidthDifference 
##             15.2160878            -28.1562179            149.5517240 
##    air.df$PriceEconomy 
##              1.0185115

Flight duration postively correlated.( 74.6080793)

Seats in economy negatively correlated.( -0.8213892)

Seats in premium postively correlated.( 15.2160878)

More Duration means more distance and that means more cost.

If more seats are available in economy more peolpe try to fit in there so price of premium class need to be decreased.

More width of seats mean less number of seats in particular in same area so more cost.