setwd("~/SIP/SIP Phase 2/R Programming/Udemy Class Material/Week 3")
airlines.df <- read.csv(paste("SixAirlinesDataV2.csv", sep=""))
View(airlines.df)
summary(airlines.df)
## Airline Aircraft FlightDuration TravelMonth
## AirFrance: 74 AirBus:151 Min. : 1.250 Aug:127
## British :175 Boeing:307 1st Qu.: 4.260 Jul: 75
## Delta : 46 Median : 7.790 Oct:127
## Jet : 61 Mean : 7.578 Sep:129
## Singapore: 40 3rd Qu.:10.620
## Virgin : 62 Max. :14.660
## IsInternational SeatsEconomy SeatsPremium PitchEconomy
## Domestic : 40 Min. : 78.0 Min. : 8.00 Min. :30.00
## International:418 1st Qu.:133.0 1st Qu.:21.00 1st Qu.:31.00
## Median :185.0 Median :36.00 Median :31.00
## Mean :202.3 Mean :33.65 Mean :31.22
## 3rd Qu.:243.0 3rd Qu.:40.00 3rd Qu.:32.00
## Max. :389.0 Max. :66.00 Max. :33.00
## PitchPremium WidthEconomy WidthPremium PriceEconomy
## Min. :34.00 Min. :17.00 Min. :17.00 Min. : 65
## 1st Qu.:38.00 1st Qu.:18.00 1st Qu.:19.00 1st Qu.: 413
## Median :38.00 Median :18.00 Median :19.00 Median :1242
## Mean :37.91 Mean :17.84 Mean :19.47 Mean :1327
## 3rd Qu.:38.00 3rd Qu.:18.00 3rd Qu.:21.00 3rd Qu.:1909
## Max. :40.00 Max. :19.00 Max. :21.00 Max. :3593
## PricePremium PriceRelative SeatsTotal PitchDifference
## Min. : 86.0 Min. :0.0200 Min. : 98 Min. : 2.000
## 1st Qu.: 528.8 1st Qu.:0.1000 1st Qu.:166 1st Qu.: 6.000
## Median :1737.0 Median :0.3650 Median :227 Median : 7.000
## Mean :1845.3 Mean :0.4872 Mean :236 Mean : 6.688
## 3rd Qu.:2989.0 3rd Qu.:0.7400 3rd Qu.:279 3rd Qu.: 7.000
## Max. :7414.0 Max. :1.8900 Max. :441 Max. :10.000
## WidthDifference PercentPremiumSeats
## Min. :0.000 Min. : 4.71
## 1st Qu.:1.000 1st Qu.:12.28
## Median :1.000 Median :13.21
## Mean :1.633 Mean :14.65
## 3rd Qu.:3.000 3rd Qu.:15.36
## Max. :4.000 Max. :24.69
British <- subset(airlines.df, Airline == "British")
Virgin <- subset(airlines.df, Airline == "Virgin")
Delta <- subset(airlines.df, Airline == "Delta")
AirFrance <- subset(airlines.df, Airline == "AirFrance")
Jet <- subset(airlines.df, Airline == "Jet")
Singapore <- subset(airlines.df, Airline == "Singapore")
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
cor(airlines.df[, c(3, 12, 13, 16:18)])
## FlightDuration PriceEconomy PricePremium
## FlightDuration 1.00000000 0.56664039 0.64873981
## PriceEconomy 0.56664039 1.00000000 0.90138870
## PricePremium 0.64873981 0.90138870 1.00000000
## PitchDifference -0.03749288 -0.09952511 -0.01806629
## WidthDifference -0.11856070 -0.08449975 -0.01151218
## PercentPremiumSeats 0.06051625 0.06532232 0.11639097
## PitchDifference WidthDifference PercentPremiumSeats
## FlightDuration -0.03749288 -0.11856070 0.06051625
## PriceEconomy -0.09952511 -0.08449975 0.06532232
## PricePremium -0.01806629 -0.01151218 0.11639097
## PitchDifference 1.00000000 0.76089108 -0.09264869
## WidthDifference 0.76089108 1.00000000 -0.27559416
## PercentPremiumSeats -0.09264869 -0.27559416 1.00000000
corrgram(airlines.df, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel=panel.txt, main = "Corrgram of airlines intercorrealtions")
#From the correlogram the it evident that thereis positive correlation between the price of premium class seat and FlightDuration, SeatsPremium, PriceEConomy, WidthEconomy, PitchEconomy, SeatsTotal and SeatsEconomy.
cor.test(airlines.df$PricePremium, airlines.df$FlightDuration)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$FlightDuration
## t = 18.204, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5923218 0.6988270
## sample estimates:
## cor
## 0.6487398
cor.test(airlines.df$PricePremium, airlines.df$SeatsTotal)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$SeatsTotal
## t = 4.1851, df = 456, p-value = 3.421e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1025049 0.2790349
## sample estimates:
## cor
## 0.1923253
cor.test(airlines.df$PricePremium, airlines.df$SeatsEconomy)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$SeatsEconomy
## t = 3.8403, df = 456, p-value = 0.0001402
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.08678154 0.26434066
## sample estimates:
## cor
## 0.1770009
cor.test(airlines.df$PricePremium, airlines.df$SeatsPremium)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$SeatsPremium
## t = 4.761, df = 456, p-value = 2.591e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1285487 0.3031938
## sample estimates:
## cor
## 0.2176124
cor.test(airlines.df$PricePremium, airlines.df$PitchEconomy)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$PitchEconomy
## t = 4.9575, df = 456, p-value = 1.009e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1373612 0.3113179
## sample estimates:
## cor
## 0.2261418
cor.test(airlines.df$PricePremium, airlines.df$WidthEconomy)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$WidthEconomy
## t = 3.2519, df = 456, p-value = 0.001231
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0597457 0.2388800
## sample estimates:
## cor
## 0.1505484
cor.test(airlines.df$PricePremium, airlines.df$PriceEconomy)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$PriceEconomy
## t = 44.452, df = 456, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8826622 0.9172579
## sample estimates:
## cor
## 0.9013887
cor.test(airlines.df$PricePremium, airlines.df$PercentPremiumSeats)
##
## Pearson's product-moment correlation
##
## data: airlines.df$PricePremium and airlines.df$PercentPremiumSeats
## t = 2.5024, df = 456, p-value = 0.01268
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0250311 0.2058228
## sample estimates:
## cor
## 0.116391