data.df = read.csv(paste("AirlinePricingData.csv", sep=""))
#View the data frame in R
# View(data.df)
data_col = data.df[, c(17:18, 8, 11:13)]
library(psych)
correlation_matrix = cor(data_col)
round(correlation_matrix, 2)
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 1.00 -0.01 -0.02 -0.03
## AdvancedBookingDays -0.01 1.00 0.01 -0.01
## FlyingMinutes -0.02 0.01 1.00 -0.32
## Capacity -0.03 -0.01 -0.32 1.00
## SeatPitch 0.07 -0.01 -0.03 0.51
## SeatWidth -0.06 0.05 -0.18 0.45
## SeatPitch SeatWidth
## Price 0.07 -0.06
## AdvancedBookingDays -0.01 0.05
## FlyingMinutes -0.03 -0.18
## Capacity 0.51 0.45
## SeatPitch 1.00 0.32
## SeatWidth 0.32 1.00
## Call:corr.test(x = correlation_matrix, use = "complete")
## Correlation matrix
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 1.00 -0.22 -0.09 -0.30
## AdvancedBookingDays -0.22 1.00 -0.08 -0.28
## FlyingMinutes -0.09 -0.08 1.00 -0.77
## Capacity -0.30 -0.28 -0.77 1.00
## SeatPitch -0.22 -0.41 -0.46 0.71
## SeatWidth -0.40 -0.19 -0.63 0.69
## SeatPitch SeatWidth
## Price -0.22 -0.40
## AdvancedBookingDays -0.41 -0.19
## FlyingMinutes -0.46 -0.63
## Capacity 0.71 0.69
## SeatPitch 1.00 0.44
## SeatWidth 0.44 1.00
## Sample Size
## [1] 6
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## Price AdvancedBookingDays FlyingMinutes Capacity
## Price 0.00 1.00 1.00 1.00
## AdvancedBookingDays 0.67 0.00 1.00 1.00
## FlyingMinutes 0.86 0.89 0.00 1.00
## Capacity 0.57 0.58 0.07 0.00
## SeatPitch 0.68 0.42 0.36 0.12
## SeatWidth 0.43 0.71 0.18 0.13
## SeatPitch SeatWidth
## Price 1.00 1
## AdvancedBookingDays 1.00 1
## FlyingMinutes 1.00 1
## Capacity 1.00 1
## SeatPitch 0.00 1
## SeatWidth 0.38 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
library(corrgram)
corrgram(data_col, order=TRUE, lower.panel=panel.conf,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of air intercorrelations")
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(data_col, histogram = TRUE, pch=19)
data_subset <- subset(data.df,data.df$DepartureCityCode == 'BOM' & data.df$ArrivalCityCode == 'DEL')
t.test(data_subset$Price, mu = 5000, alternative = "greater")
##
## One Sample t-test
##
## data: data_subset$Price
## t = 6.0784, df = 129, p-value = 6.385e-09
## alternative hypothesis: true mean is greater than 5000
## 95 percent confidence interval:
## 5910.787 Inf
## sample estimates:
## mean of x
## 6252.054
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean flight price more than INR 5000.
# t.test(data.df$Price, mu = 5000, alternative = "greater")
t.test(data.df$Price ~ data.df$Departure, data=data.df, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: data.df$Price by data.df$Departure
## t = 1.736, df = 296.58, p-value = 0.0418
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 22.71262 Inf
## sample estimates:
## mean in group AM mean in group PM
## 5598.893 5140.610
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean morning flight price more than afternoon flight price
# t.test(data.df$Price, mu = 5000, alternative = "greater")
t.test(data.df$Price ~ data.df$IsDiwali, data=data.df, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: data.df$Price by data.df$IsDiwali
## t = -2.9799, df = 244.52, p-value = 0.9984
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1295.591 Inf
## sample estimates:
## mean in group 0 mean in group 1
## 5063.810 5897.479
Since, P-value is more than 0.05 hence, we accept null hypotheis of mean flight price around Diwali NOT more than flight price of Non-Diwali
subset_data = subset(data.df, data.df$Airline == "IndiGo" | data.df$Airline == "Air India")
t.test(subset_data$Price ~ subset_data$Airline, data=subset_data, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: subset_data$Price by subset_data$Airline
## t = 2.7205, df = 87.71, p-value = 0.00393
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 566.0833 Inf
## sample estimates:
## mean in group Air India mean in group IndiGo
## 6335.000 4879.525
Since, P-value is less than 0.05 hence, we reject null hypotheis and accept alternate hypothesis of mean flight price of Air India more than IndiGo
reg <- lm(Price ~ AdvancedBookingDays, data = data.df)
summary(reg)
##
## Call:
## lm(formula = Price ~ AdvancedBookingDays, data = data.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2786.5 -1320.8 -688.9 351.2 12594.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5422.959 224.497 24.16 <2e-16 ***
## AdvancedBookingDays -0.983 6.154 -0.16 0.873
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2392 on 303 degrees of freedom
## Multiple R-squared: 8.422e-05, Adjusted R-squared: -0.003216
## F-statistic: 0.02552 on 1 and 303 DF, p-value: 0.8732