df <- read.csv(paste('SixAirlines.csv',sep = ""))
summary(df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
View(df)
str(df)
## 'data.frame': 458 obs. of 18 variables:
## $ Airline : Factor w/ 6 levels "AirFrance","British",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Aircraft : Factor w/ 2 levels "AirBus","Boeing": 2 2 2 2 2 2 2 2 2 2 ...
## $ FlightDuration : num 12.25 12.25 12.25 12.25 8.16 ...
## $ TravelMonth : Factor w/ 4 levels "Aug","Jul","Oct",..: 2 1 4 3 1 4 3 1 4 4 ...
## $ IsInternational : Factor w/ 2 levels "Domestic","International": 2 2 2 2 2 2 2 2 2 2 ...
## $ SeatsEconomy : int 122 122 122 122 122 122 122 122 122 122 ...
## $ SeatsPremium : int 40 40 40 40 40 40 40 40 40 40 ...
## $ PitchEconomy : int 31 31 31 31 31 31 31 31 31 31 ...
## $ PitchPremium : int 38 38 38 38 38 38 38 38 38 38 ...
## $ WidthEconomy : int 18 18 18 18 18 18 18 18 18 18 ...
## $ WidthPremium : int 19 19 19 19 19 19 19 19 19 19 ...
## $ PriceEconomy : int 2707 2707 2707 2707 1793 1793 1793 1476 1476 1705 ...
## $ PricePremium : int 3725 3725 3725 3725 2999 2999 2999 2997 2997 2989 ...
## $ PriceRelative : num 0.38 0.38 0.38 0.38 0.67 0.67 0.67 1.03 1.03 0.75 ...
## $ SeatsTotal : int 162 162 162 162 162 162 162 162 162 162 ...
## $ PitchDifference : int 7 7 7 7 7 7 7 7 7 7 ...
## $ WidthDifference : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PercentPremiumSeats: num 24.7 24.7 24.7 24.7 24.7 ...
data1= xtabs(~Airline+TravelMonth,df)
barplot(data1,
main = "distribution of Months and airlines",
xlab = "months",
ylab = "frequency",
col=c("blue","yellow","red","lightblue","green","magenta"),
ylim = c(0,140),
legend= rownames(data1),
beside = TRUE)
data2 = xtabs(~Airline+Aircraft,df)
barplot(data2,
main = "distribution of Aircrafts and Airlines",
xlab = "Aircrafts",
ylab = "Count",
col=c("blue","yellow","red","lightblue","green","magenta"),
ylim = c(0,140),
legend= rownames(data1),
beside = TRUE)
attach(df)
boxplot(FlightDuration, main="Flight Duration", xlab = "Duration", outline = TRUE, col = "lightblue", horizontal = TRUE)
barplot(table(IsInternational), xlab="Type of Flight", ylab="Count", main="Data Available by Type of Flight", col = c("magenta","lightblue"))
data3 <-aggregate(x=df[c("PriceEconomy","PricePremium")],by=list(Airline=df$Airline),FUN = mean)
data3
## Airline PriceEconomy PricePremium
## 1 AirFrance 2769.7838 3065.2162
## 2 British 1293.4800 1937.0286
## 3 Delta 560.9348 684.6739
## 4 Jet 276.1639 483.3607
## 5 Singapore 860.2500 1239.9250
## 6 Virgin 1603.5323 2721.6935
data4 <-aggregate(x=df[c("FlightDuration","PricePremium")],by=list(Airline= df$Airline),FUN=mean)
data4
## Airline FlightDuration PricePremium
## 1 AirFrance 8.988514 3065.2162
## 2 British 7.854971 1937.0286
## 3 Delta 4.028913 684.6739
## 4 Jet 4.143934 483.3607
## 5 Singapore 10.481000 1239.9250
## 6 Virgin 9.250484 2721.6935
library("lattice")
barchart(Airline~PriceEconomy+PricePremium,data=data3,auto.key=TRUE)
library("lattice")
barchart(Airline~FlightDuration+PricePremium,data=data4,auto.key=TRUE)
plot(df$FlightDuration,df$PricePremium,xlab = "FlightDuration",ylab="PricePremium",main = "Duration vs Premiumclass price")
abline(lm(df$PriceEconomy~df$FlightDuration),
col="blue")
plot(df$FlightDuration,df$PriceEconomy,xlab = "FlightDuration",ylab="PriceEconomy",main = "Duration vs Economyclass price")
abline(lm(df$PriceEconomy~df$FlightDuration),
col="blue")
boxplot(df$PriceRelative, xlab="Prices", ylab="Price Relative",
main="Relative prices", horizontal=TRUE,color="lightblue")
library(corrgram)
corrgram(df, order=FALSE,
lower.panel=panel.shade,
upper.panel=panel.pie,
diag.panel=panel.minmax,
text.panel=panel.txt,
main="Corrgram of plane.df intercorrelations")
## T-Tests #### Null hypothesis : There is no significant effect number of premium seats and price of premium seats
t.test(df$SeatsPremium, df$PricePremium, paired = TRUE)
##
## Paired t-test
##
## data: df$SeatsPremium and df$PricePremium
## t = -30.164, df = 457, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1929.635 -1693.583
## sample estimates:
## mean of the differences
## -1811.609
lmFit <- lm(PriceEconomy~PitchDifference + WidthDifference + PercentPremiumSeats + SeatsTotal + FlightDuration, data= df)
summary(lmFit)
##
## Call:
## lm(formula = PriceEconomy ~ PitchDifference + WidthDifference +
## PercentPremiumSeats + SeatsTotal + FlightDuration, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1738.0 -507.2 -165.7 461.0 1802.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 282.6872 241.9238 1.168 0.24322
## PitchDifference -105.7543 34.7489 -3.043 0.00248 **
## WidthDifference 124.9332 54.2737 2.302 0.02179 *
## PercentPremiumSeats 13.8679 8.7202 1.590 0.11246
## SeatsTotal 0.6640 0.4855 1.368 0.17210
## FlightDuration 156.7510 11.0095 14.238 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 809.5 on 452 degrees of freedom
## Multiple R-squared: 0.3364, Adjusted R-squared: 0.3291
## F-statistic: 45.83 on 5 and 452 DF, p-value: < 2.2e-16