R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

airline_df<-read.csv("./AirlinePricingData.csv")

air_df <- airline_df [,c("Price","AdvancedBookingDays","FlyingMinutes","Capacity","SeatPitch","SeatWidth")]
library(psych)
describe(air_df)
##                     vars   n    mean      sd median trimmed     mad  min
## Price                  1 305 5394.54 2388.29   4681 4984.80 1052.65 2607
## AdvancedBookingDays    2 305   28.90   22.30     30   28.50   34.10    2
## FlyingMinutes          3 305  136.03    4.71    135  135.80    7.41  125
## Capacity               4 305  176.36   32.39    180  172.19   14.83  138
## SeatPitch              5 305   30.26    0.93     30   30.17    0.00   29
## SeatWidth              6 305   17.41    0.49     17   17.38    0.00   17
##                       max range skew kurtosis     se
## Price               18015 15408 2.26     6.41 136.75
## AdvancedBookingDays    61    59 0.03    -1.68   1.28
## FlyingMinutes         145    20 0.28    -0.33   0.27
## Capacity              303   165 2.11     5.91   1.85
## SeatPitch              33     4 1.03     0.61   0.05
## SeatWidth              18     1 0.37    -1.86   0.03

Q.1.a Correlation matrix for the variables {“Price”, “AdvancedBookingDays”, “FlyingMinutes”, “Capacity”, “SeatPitch”, “SeatWidth”}

Types of Correlations

Pearson, Spearman and Kendall

Correlation (Pearsons) among selected columns

# Pearson product-moment correlation coefficients stored in 'matrix'
matrix <- cor(air_df)
# round upto 2 decimal places
round(matrix, 2) 
##                     Price AdvancedBookingDays FlyingMinutes Capacity
## Price                1.00               -0.01         -0.02    -0.03
## AdvancedBookingDays -0.01                1.00          0.01    -0.01
## FlyingMinutes       -0.02                0.01          1.00    -0.32
## Capacity            -0.03               -0.01         -0.32     1.00
## SeatPitch            0.07               -0.01         -0.03     0.51
## SeatWidth           -0.06                0.05         -0.18     0.45
##                     SeatPitch SeatWidth
## Price                    0.07     -0.06
## AdvancedBookingDays     -0.01      0.05
## FlyingMinutes           -0.03     -0.18
## Capacity                 0.51      0.45
## SeatPitch                1.00      0.32
## SeatWidth                0.32      1.00

Correlation (Spearman) among selected columns

# Pearson product-moment correlation coefficients stored in 'matrix2'
matrix2 <- cor(air_df,method="spearman")
# round upto 2 decimal places
round(matrix2, 2) 
##                     Price AdvancedBookingDays FlyingMinutes Capacity
## Price                1.00               -0.09          0.02    -0.07
## AdvancedBookingDays -0.09                1.00          0.02    -0.01
## FlyingMinutes        0.02                0.02          1.00    -0.34
## Capacity            -0.07               -0.01         -0.34     1.00
## SeatPitch            0.15                0.00          0.02     0.22
## SeatWidth           -0.15                0.10         -0.18     0.54
##                     SeatPitch SeatWidth
## Price                    0.15     -0.15
## AdvancedBookingDays      0.00      0.10
## FlyingMinutes            0.02     -0.18
## Capacity                 0.22      0.54
## SeatPitch                1.00      0.32
## SeatWidth                0.32      1.00

Q.1.b Correlations (Pearson) with significance levels

The rcorr() function in the Hmisc package produces correlations/covariances and significance levels for pearson and spearman correlations.

library(Hmisc)
air_mat <- rcorr(as.matrix(air_df), type="pearson")
air_mat

Q.1.b Correlations (Spearman) with significance levels

The rcorr() function in the Hmisc package produces correlations/covariances and significance levels for pearson and spearman correlations.

library(Hmisc)
air_mat2 <- rcorr(as.matrix(air_df), type="spearman")
air_mat2

Q.1.c R code to visualize the correlation matrix

Q.1.e Corrgram

====================================================

## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#2a
z<-read.csv("./AirlinePricingData.csv")
abc<-read.csv("./AirlinePricingData.csv")
t.test(z$Price,mu=5000,alternative = "greater")
## 
##  One Sample t-test
## 
## data:  z$Price
## t = 2.8851, df = 304, p-value = 0.002096
## alternative hypothesis: true mean is greater than 5000
## 95 percent confidence interval:
##  5168.918      Inf
## sample estimates:
## mean of x 
##  5394.544
#2b
am=subset(abc,abc$Departure=="AM")
pm=subset(abc,abc$Departure=="PM")
t.test(am$Price,pm$Price,alternative = "greater")
## 
##  Welch Two Sample t-test
## 
## data:  am$Price and pm$Price
## t = 1.736, df = 296.58, p-value = 0.0418
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  22.71262      Inf
## sample estimates:
## mean of x mean of y 
##  5598.893  5140.610
#2c
diw=subset(abc,abc$IsDiwali==1)
nondiw=subset(abc,abc$IsDiwali==0)
t.test(diw$Price,nondiw$Price,alternative = "greater")
## 
##  Welch Two Sample t-test
## 
## data:  diw$Price and nondiw$Price
## t = 2.9799, df = 244.52, p-value = 0.001587
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  371.7482      Inf
## sample estimates:
## mean of x mean of y 
##  5897.479  5063.810
#2d
ai=subset(abc,abc$Airline=="Air India")
ind=subset(abc,abc$Airline=="IndiGo")
t.test(ai$Price,ind$Price,alternative = "greater")
## 
##  Welch Two Sample t-test
## 
## data:  ai$Price and ind$Price
## t = 2.7205, df = 87.71, p-value = 0.00393
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  566.0833      Inf
## sample estimates:
## mean of x mean of y 
##  6335.000  4879.525
#Question 3
modl=lm(z$Price~z$AdvancedBookingDays)
summary(modl)
## 
## Call:
## lm(formula = z$Price ~ z$AdvancedBookingDays)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2786.5 -1320.8  -688.9   351.2 12594.0 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5422.959    224.497   24.16   <2e-16 ***
## z$AdvancedBookingDays   -0.983      6.154   -0.16    0.873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2392 on 303 degrees of freedom
## Multiple R-squared:  8.422e-05,  Adjusted R-squared:  -0.003216 
## F-statistic: 0.02552 on 1 and 303 DF,  p-value: 0.8732