air = read.csv(paste("AirlinePricingData.csv", sep=""))
# View(air)
# library(psych)
# describe(air)
var1 = air[, c(17:18, 8, 11:13)]
library(psych)
matrix = cor(var1)
round(matrix, 2)
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 1.00 -0.01 -0.02 -0.03
## AdvancedBookingDays -0.01 1.00 0.01 -0.01
## FlyingMinutes -0.02 0.01 1.00 -0.32
## Capacity -0.03 -0.01 -0.32 1.00
## SeatPitch 0.07 -0.01 -0.03 0.51
## SeatWidth -0.06 0.05 -0.18 0.45
## SeatPitch SeatWidth
## Price 0.07 -0.06
## AdvancedBookingDays -0.01 0.05
## FlyingMinutes -0.03 -0.18
## Capacity 0.51 0.45
## SeatPitch 1.00 0.32
## SeatWidth 0.32 1.00
## Call:corr.test(x = var1, use = "complete")
## Correlation matrix
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 1.00 -0.01 -0.02 -0.03
## AdvancedBookingDays -0.01 1.00 0.01 -0.01
## FlyingMinutes -0.02 0.01 1.00 -0.32
## Capacity -0.03 -0.01 -0.32 1.00
## SeatPitch 0.07 -0.01 -0.03 0.51
## SeatWidth -0.06 0.05 -0.18 0.45
## SeatPitch SeatWidth
## Price 0.07 -0.06
## AdvancedBookingDays -0.01 0.05
## FlyingMinutes -0.03 -0.18
## Capacity 0.51 0.45
## SeatPitch 1.00 0.32
## SeatWidth 0.32 1.00
## Sample Size
## [1] 305
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 0.00 1.00 1.00 1
## AdvancedBookingDays 0.87 0.00 1.00 1
## FlyingMinutes 0.75 0.93 0.00 0
## Capacity 0.65 0.88 0.00 0
## SeatPitch 0.19 0.81 0.55 0
## SeatWidth 0.30 0.34 0.00 0
## SeatPitch SeatWidth
## Price 1 1.00
## AdvancedBookingDays 1 1.00
## FlyingMinutes 1 0.01
## Capacity 0 0.00
## SeatPitch 0 0.00
## SeatWidth 0 0.00
##
## To see confidence intervals of the correlations, print with the short=FALSE option
library(psych)
corr.test(var1, use="complete")
library(corrgram)
corrgram(var1, order=TRUE, lower.panel=panel.conf,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of air intercorrelations")
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(var1, histogram = TRUE)
bom_to_del <- subset(air,air$DepartureCityCode == 'BOM' & air$ArrivalCityCode == 'DEL')
t.test(bom_to_del$Price, mu = 5000)
##
## One Sample t-test
##
## data: bom_to_del$Price
## t = 6.0784, df = 129, p-value = 1.277e-08
## alternative hypothesis: true mean is not equal to 5000
## 95 percent confidence interval:
## 5844.506 6659.601
## sample estimates:
## mean of x
## 6252.054
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean flight price more than INR 5000.
t.test(air$Price ~ air$Departure, data=air, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: air$Price by air$Departure
## t = 1.736, df = 296.58, p-value = 0.0418
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 22.71262 Inf
## sample estimates:
## mean in group AM mean in group PM
## 5598.893 5140.610
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean morning flight price more than afternoon flight price
t.test(air$Price ~ air$IsDiwali, data=air, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: air$Price by air$IsDiwali
## t = -2.9799, df = 244.52, p-value = 0.9984
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1295.591 Inf
## sample estimates:
## mean in group 0 mean in group 1
## 5063.810 5897.479
Since, P-value is more than 0.05 hence, we accept null hypotheis of mean flight price around Diwali NOT more than flight price of Non-Diwali
subset_data = subset(air, air$Airline == "IndiGo" | air$Airline == "Air India")
t.test(subset_data$Price ~ subset_data$Airline, data=subset_data, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: subset_data$Price by subset_data$Airline
## t = 2.7205, df = 87.71, p-value = 0.00393
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 566.0833 Inf
## sample estimates:
## mean in group Air India mean in group IndiGo
## 6335.000 4879.525
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean flight price of Air India more than IndiGo
Y = airline ticket price (Dependent Variable) X = advance booking days (Independent Variable) Y = b0 + b1*X + e
reg <- lm(Price ~ AdvancedBookingDays, data = air)
summary(reg)
##
## Call:
## lm(formula = Price ~ AdvancedBookingDays, data = air)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2786.5 -1320.8 -688.9 351.2 12594.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5422.959 224.497 24.16 <2e-16 ***
## AdvancedBookingDays -0.983 6.154 -0.16 0.873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2392 on 303 degrees of freedom
## Multiple R-squared: 8.422e-05, Adjusted R-squared: -0.003216
## F-statistic: 0.02552 on 1 and 303 DF, p-value: 0.8732
Therefore, the regression model is: Airline ticket price = 5422.959 - Advance booking days*0.983