df <- read.csv(paste('SixAirlines.csv',sep = ""))
summary(df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
View(df)
str(df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...
data1= xtabs(~Airline+TravelMonth,df)
barplot(data1,
        main = "distribution of Months and airlines",
        xlab = "months",
        ylab = "frequency",
        col=c("blue","yellow","red","lightblue","green","magenta"),
        ylim = c(0,140),
        legend= rownames(data1),
        beside = TRUE)

We can see that British airlines performs well in all given months and we can also see that more flights travel during october and september

data2 = xtabs(~Airline+Aircraft,df)
barplot(data2,
        main = "distribution of Aircrafts and Airlines",
        xlab = "Aircrafts",
        ylab = "Count",
        col=c("blue","yellow","red","lightblue","green","magenta"),
        ylim = c(0,140),
        legend= rownames(data1),
        beside = TRUE)

It’s apparent from visualization that Boeing is most preferred between AirBus and Boeing by almost all Airlines

attach(df)
boxplot(FlightDuration, main="Flight Duration", xlab = "Duration", outline = TRUE, col = "lightblue", horizontal = TRUE)

barplot(table(IsInternational), xlab="Type of Flight", ylab="Count", main="Data Available by Type of Flight", col = c("magenta","lightblue"))

data3 <-aggregate(x=df[c("PriceEconomy","PricePremium")],by=list(Airline=df$Airline),FUN = mean)
data3
##     Airline PriceEconomy PricePremium
## 1 AirFrance    2769.7838    3065.2162
## 2   British    1293.4800    1937.0286
## 3     Delta     560.9348     684.6739
## 4       Jet     276.1639     483.3607
## 5 Singapore     860.2500    1239.9250
## 6    Virgin    1603.5323    2721.6935
data4 <-aggregate(x=df[c("FlightDuration","PricePremium")],by=list(Airline= df$Airline),FUN=mean)
data4
##     Airline FlightDuration PricePremium
## 1 AirFrance       8.988514    3065.2162
## 2   British       7.854971    1937.0286
## 3     Delta       4.028913     684.6739
## 4       Jet       4.143934     483.3607
## 5 Singapore      10.481000    1239.9250
## 6    Virgin       9.250484    2721.6935
library("lattice")
barchart(Airline~PriceEconomy+PricePremium,data=data3,auto.key=TRUE)

library("lattice")
barchart(Airline~FlightDuration+PricePremium,data=data4,auto.key=TRUE)

plot(df$FlightDuration,df$PricePremium,xlab = "FlightDuration",ylab="PricePremium",main = "Duration vs Premiumclass price")
abline(lm(df$PriceEconomy~df$FlightDuration),
       col="blue")

plot(df$FlightDuration,df$PriceEconomy,xlab = "FlightDuration",ylab="PriceEconomy",main = "Duration vs Economyclass price")
abline(lm(df$PriceEconomy~df$FlightDuration),
       col="blue")

boxplot(df$PriceRelative, xlab="Prices", ylab="Price Relative",
        main="Relative prices", horizontal=TRUE,color="lightblue")

library(corrgram)
corrgram(df, order=FALSE,   
         lower.panel=panel.shade,
         upper.panel=panel.pie, 
         diag.panel=panel.minmax,
         text.panel=panel.txt,
         main="Corrgram of plane.df intercorrelations")

## T-Tests #### Null hypothesis : There is no significant effect number of premium seats and price of premium seats

t.test(df$SeatsPremium, df$PricePremium, paired = TRUE)
## 
##  Paired t-test
## 
## data:  df$SeatsPremium and df$PricePremium
## t = -30.164, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1929.635 -1693.583
## sample estimates:
## mean of the differences 
##               -1811.609

Conclusion : We can reject null hypothosis

Null hypothesis :There is no significant effect number of premium seats and price of Economy seats

t.test(df$SeatsPremium, df$PriceEconomy, paired = TRUE)
## 
##  Paired t-test
## 
## data:  df$SeatsPremium and df$PriceEconomy
## t = -28.049, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1384.047 -1202.809
## sample estimates:
## mean of the differences 
##               -1293.428

Conclusion :Reject null hypothesis

Buiding Linear regreession Model to predict Price of Economy class

lmFit <- lm(PriceEconomy~PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + FlightDuration, data= df)
summary(lmFit)
## 
## Call:
## lm(formula = PriceEconomy ~ PitchDifference + WidthDifference + 
##     PercentPremiumSeats + SeatsTotal + FlightDuration, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1738.0  -507.2  -165.7   461.0  1802.6 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          282.6872   241.9238   1.168  0.24322    
## PitchDifference     -105.7543    34.7489  -3.043  0.00248 ** 
## WidthDifference      124.9332    54.2737   2.302  0.02179 *  
## PercentPremiumSeats   13.8679     8.7202   1.590  0.11246    
## SeatsTotal             0.6640     0.4855   1.368  0.17210    
## FlightDuration       156.7510    11.0095  14.238  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 809.5 on 452 degrees of freedom
## Multiple R-squared:  0.3364, Adjusted R-squared:  0.3291 
## F-statistic: 45.83 on 5 and 452 DF,  p-value: < 2.2e-16

significant explanatory variables are PitchDiffernce , Widthdifference,flightDuration

Regression model to determine price of Premium

Fit <- lm(PricePremium~PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + FlightDuration, data= df)
summary(Fit)
## 
## Call:
## lm(formula = PricePremium ~ PitchDifference + WidthDifference + 
##     PercentPremiumSeats + SeatsTotal + FlightDuration, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2331.5  -573.4   -48.2   619.7  4540.2 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -591.3212   284.9959  -2.075 0.038567 *  
## PitchDifference     -134.5022    40.9356  -3.286 0.001096 ** 
## WidthDifference      285.0390    63.9366   4.458 1.04e-05 ***
## PercentPremiumSeats   43.4230    10.2727   4.227 2.87e-05 ***
## SeatsTotal             2.0412     0.5719   3.569 0.000397 ***
## FlightDuration       231.3300    12.9697  17.836  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 953.6 on 452 degrees of freedom
## Multiple R-squared:  0.458,  Adjusted R-squared:  0.452 
## F-statistic: 76.38 on 5 and 452 DF,  p-value: < 2.2e-16

In contrast to above model we’ve built to predict price of economy this model shows that all explonatory variables have significant effect on determining/predicting te price of premium ticket