#To  Read the data#
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
#To Summarize the data#
library(psych)
summary(airlines.df)
##       Airline      Aircraft   FlightDuration   TravelMonth
##  AirFrance: 74   AirBus:151   Min.   : 1.250   Aug:127    
##  British  :175   Boeing:307   1st Qu.: 4.260   Jul: 75    
##  Delta    : 46                Median : 7.790   Oct:127    
##  Jet      : 61                Mean   : 7.578   Sep:129    
##  Singapore: 40                3rd Qu.:10.620              
##  Virgin   : 62                Max.   :14.660              
##       IsInternational  SeatsEconomy    SeatsPremium    PitchEconomy  
##  Domestic     : 40    Min.   : 78.0   Min.   : 8.00   Min.   :30.00  
##  International:418    1st Qu.:133.0   1st Qu.:21.00   1st Qu.:31.00  
##                       Median :185.0   Median :36.00   Median :31.00  
##                       Mean   :202.3   Mean   :33.65   Mean   :31.22  
##                       3rd Qu.:243.0   3rd Qu.:40.00   3rd Qu.:32.00  
##                       Max.   :389.0   Max.   :66.00   Max.   :33.00  
##   PitchPremium    WidthEconomy    WidthPremium    PriceEconomy 
##  Min.   :34.00   Min.   :17.00   Min.   :17.00   Min.   :  65  
##  1st Qu.:38.00   1st Qu.:18.00   1st Qu.:19.00   1st Qu.: 413  
##  Median :38.00   Median :18.00   Median :19.00   Median :1242  
##  Mean   :37.91   Mean   :17.84   Mean   :19.47   Mean   :1327  
##  3rd Qu.:38.00   3rd Qu.:18.00   3rd Qu.:21.00   3rd Qu.:1909  
##  Max.   :40.00   Max.   :19.00   Max.   :21.00   Max.   :3593  
##   PricePremium    PriceRelative      SeatsTotal  PitchDifference 
##  Min.   :  86.0   Min.   :0.0200   Min.   : 98   Min.   : 2.000  
##  1st Qu.: 528.8   1st Qu.:0.1000   1st Qu.:166   1st Qu.: 6.000  
##  Median :1737.0   Median :0.3650   Median :227   Median : 7.000  
##  Mean   :1845.3   Mean   :0.4872   Mean   :236   Mean   : 6.688  
##  3rd Qu.:2989.0   3rd Qu.:0.7400   3rd Qu.:279   3rd Qu.: 7.000  
##  Max.   :7414.0   Max.   :1.8900   Max.   :441   Max.   :10.000  
##  WidthDifference PercentPremiumSeats
##  Min.   :0.000   Min.   : 4.71      
##  1st Qu.:1.000   1st Qu.:12.28      
##  Median :1.000   Median :13.21      
##  Mean   :1.633   Mean   :14.65      
##  3rd Qu.:3.000   3rd Qu.:15.36      
##  Max.   :4.000   Max.   :24.69
#To find the Data Types#
str(airlines.df)
## 'data.frame':    458 obs. of  18 variables:
##  $ Airline            : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ Aircraft           : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FlightDuration     : num  12.25 12.25 12.25 12.25 8.16 ...
##  $ TravelMonth        : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
##  $ IsInternational    : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SeatsEconomy       : int  122 122 122 122 122 122 122 122 122 122 ...
##  $ SeatsPremium       : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ PitchEconomy       : int  31 31 31 31 31 31 31 31 31 31 ...
##  $ PitchPremium       : int  38 38 38 38 38 38 38 38 38 38 ...
##  $ WidthEconomy       : int  18 18 18 18 18 18 18 18 18 18 ...
##  $ WidthPremium       : int  19 19 19 19 19 19 19 19 19 19 ...
##  $ PriceEconomy       : int  2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
##  $ PricePremium       : int  3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
##  $ PriceRelative      : num  0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
##  $ SeatsTotal         : int  162 162 162 162 162 162 162 162 162 162 ...
##  $ PitchDifference    : int  7 7 7 7 7 7 7 7 7 7 ...
##  $ WidthDifference    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ PercentPremiumSeats: num  24.7 24.7 24.7 24.7 24.7 ...
#To represent the Air Lines types by country name#
par(mfrow=c(1,1))
pie(table(airlines.df$Airline),
col=c("Violet","blue","green","yellow","red","purple"),main="Airline type by country")

#To find the type of aircraft manufacturer by company #
par(mfrow=c(1,1))
pie(table(airlines.df$Aircraft),col=c("blue","red"),main="Aircraft manufacturer by company ")

#To find the services #
par(mfrow=c(1,1))
pie(table(airlines.df$IsInternational),col=c("yellow","purple"),main="Domesic/International ")

#To plot the travel flow in various months#
pie(table(airlines.df$TravelMonth),main="Analysing peak months",
col=c("orange","purple","blue","red"))

#To plot flight duration of various Airlines#
boxplot(FlightDuration~Airline,data=airlines.df,xlab="Airline", ylab="Flight duration",col = c("green","blue"," red","grey","cyan","purple"))

#To plot the pitch of seats in Economy and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$PitchEconomy, xlab="Economy Seats Pitch",col = "red",main="Economy class ")
hist(airlines.df$PitchPremium, xlab="Premium Seats Pitch",col = "yellow",main="Premium class ")

#To plot the width of seats in Economy class and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$WidthEconomy, xlab="Economy Seats Width",col = " blue",main="Economy class")
hist(airlines.df$WidthPremium, xlab="Premium Seats Width",col = " green",main="Premium class")

#To plot the price of seats in Economy class and Premium class Vs Frequency#
par(mfrow=c(1,2))
hist(airlines.df$PriceEconomy, xlab="Economy Seats Price",col = "navy blue",main="Economy class")
hist(airlines.df$PricePremium, xlab="Premium Seats Price",col = "dark green",main="Premium class")

#To plot Scatter plot #
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(lattice)
par(mfrow=c(1,1))
#Scatter plot of pricing of no. of seats with price of economy class#
scatterplot(airlines.df$SeatsEconomy,airlines.df$PriceEconomy, main="scatterplot of pricing of no. of seats with price of economy class",
  xlab = "Seats Economy",ylab = "Price Economy ")

#scatter plot of no. of seats with price of premium economy class#
scatterplot(airlines.df$SeatsPremium,airlines.df$PricePremium, main="scatterplot of pricing of no. of seats with price of premium economy class",
            xlab = "Seats Premium",ylab = "Pricde Premium")

#To scatterplot the price relative and  total seats available#
scatterplot(airlines.df$PriceRelative,airlines.df$SeatsTotal, main="scatter plot of  Price Relative and Seats Total available",
            xlab="Price Relative", ylab="Seats Total")

#To scatterplot the price relative and width difference#
scatterplot(airlines.df$PriceRelative,airlines.df$WidthDifference, main="scatterplot Price Relative and Width Difference", 
            xlab="Width", ylab="Price")

#To scatterplot the price relative and pitch difference#
scatterplot(airlines.df$PriceRelative,airlines.df$PitchDifference, main="scatter plot of PriceRelative and Pitch Difference",
            xlab = "Pitch", ylab="Price")

#To plot the boxplot for pricing of no. of seats with economy class andd premium class#
par(mfrow=c(1, 2))
boxplot(airlines.df$SeatsEconomy, main="pricing of no. of Seats for Economy Class",
        xlab="Seats Economy", col="blue")
boxplot(airlines.df$SeatsPremium, main="pricing of no. of Seats for Premium Class",
        xlab="Seats Premium", col=" dark green")

#To find the mean and standard deviation #
#seats economy#
mean(airlines.df$SeatsEconomy)
## [1] 202.3122
sd(airlines.df$SeatsEconomy )
## [1] 76.37353
#seats premium#
mean(airlines.df$ SeatsPremium)
## [1] 33.64847
sd(airlines.df$ SeatsPremium)
## [1] 13.26142
#pitch Economy#
mean(airlines.df$PitchEconomy)
## [1] 31.21834
sd(airlines.df$PitchEconomy)
## [1] 0.6551695
#pitch Premium#
mean(airlines.df$ PitchPremium)
## [1] 37.90611
sd(airlines.df$PitchPremium)
## [1] 1.313924
#width Economy#
mean(airlines.df$WidthEconomy)
## [1] 17.83843
sd(airlines.df$WidthEconomy)
## [1] 0.5575102
#width premium#
mean(airlines.df$WidthPremium)
## [1] 19.47162
sd(airlines.df$WidthPremium)
## [1] 1.097173
#price economy#
mean(airlines.df$PriceEconomy)
## [1] 1327.076
sd(airlines.df$PriceEconomy)
## [1] 988.2733
#price premium#
mean(airlines.df$PricePremium)
## [1] 1845.258
sd(airlines.df$PricePremium)
## [1] 1288.136
#price relative#
mean(airlines.df$PriceRelative)
## [1] 0.4872052
sd(airlines.df$PriceRelative)
## [1] 0.4505873
#corrgram of airline industry#
library(corrgram)

cols <- colorRampPalette(c("yellow", "skyblue",
                           "red", "darkgreen"))
corrgram(airlines.df, order=TRUE,col.regions=cols,
         lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of  Airlines Industry Analysis")

#T-test#
t.test(airlines.df$PriceEconomy,airlines.df$PricePremium)
## 
##  Welch Two Sample t-test
## 
## data:  airlines.df$PriceEconomy and airlines.df$PricePremium
## t = -6.8304, df = 856.56, p-value = 1.605e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -667.0831 -369.2793
## sample estimates:
## mean of x mean of y 
##  1327.076  1845.258
#correlation test#
cor.test(airlines.df$PriceEconomy,airlines.df$PricePremium)
## 
##  Pearson's product-moment correlation
## 
## data:  airlines.df$PriceEconomy and airlines.df$PricePremium
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8826622 0.9172579
## sample estimates:
##       cor 
## 0.9013887
#Regression model#
rmodel=lm(PricePremium ~Airline+TravelMonth+FlightDuration+PitchDifference+WidthDifference+PercentPremiumSeats+PriceRelative,airlines.df)
summary(rmodel)
## 
## Call:
## lm(formula = PricePremium ~ Airline + TravelMonth + FlightDuration + 
##     PitchDifference + WidthDifference + PercentPremiumSeats + 
##     PriceRelative, data = airlines.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2216.1  -408.6   102.1   392.2  4277.1 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           624.427    534.907   1.167   0.2437    
## AirlineBritish       -972.777    218.739  -4.447 1.10e-05 ***
## AirlineDelta        -1133.805    265.671  -4.268 2.42e-05 ***
## AirlineJet          -2497.060    241.586 -10.336  < 2e-16 ***
## AirlineSingapore    -2077.175    166.680 -12.462  < 2e-16 ***
## AirlineVirgin       -1026.230    210.448  -4.876 1.51e-06 ***
## TravelMonthJul         78.482    111.257   0.705   0.4809    
## TravelMonthOct        -39.008     94.984  -0.411   0.6815    
## TravelMonthSep         -4.181     94.494  -0.044   0.9647    
## FlightDuration        187.775     12.780  14.693  < 2e-16 ***
## PitchDifference        25.189    116.907   0.215   0.8295    
## WidthDifference       259.734    157.784   1.646   0.1004    
## PercentPremiumSeats    15.601      9.352   1.668   0.0960 .  
## PriceRelative         234.797    100.289   2.341   0.0197 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 755.4 on 444 degrees of freedom
## Multiple R-squared:  0.6658, Adjusted R-squared:  0.6561 
## F-statistic: 68.05 on 13 and 444 DF,  p-value: < 2.2e-16

From the above analysis we can say that it is a good model as it’s p-value i less than 0.05

rmodel$coefficients
##         (Intercept)      AirlineBritish        AirlineDelta 
##          624.426971         -972.777431        -1133.805429 
##          AirlineJet    AirlineSingapore       AirlineVirgin 
##        -2497.059506        -2077.174783        -1026.229834 
##      TravelMonthJul      TravelMonthOct      TravelMonthSep 
##           78.481517          -39.007936           -4.181169 
##      FlightDuration     PitchDifference     WidthDifference 
##          187.774965           25.188610          259.734439 
## PercentPremiumSeats       PriceRelative 
##           15.600923          234.797335