************************************************************************

# Q1a. Write R code to generate the correlation matrix for the given continuous variables {"Price", "AdvancedBookingDays", "FlyingMinutes", "Capacity", "SeatPitch", "SeatWidth"}

matrix1a <- cor(price1[,c("Price", "AdvancedBookingDays", "FlyingMinutes", "Capacity", "SeatPitch", "SeatWidth")])
# round upto 2 decimal places
round(matrix1a, 2) 
##                     Price AdvancedBookingDays FlyingMinutes Capacity
## Price                1.00               -0.01         -0.02    -0.03
## AdvancedBookingDays -0.01                1.00          0.01    -0.01
## FlyingMinutes       -0.02                0.01          1.00    -0.32
## Capacity            -0.03               -0.01         -0.32     1.00
## SeatPitch            0.07               -0.01         -0.03     0.51
## SeatWidth           -0.06                0.05         -0.18     0.45
##                     SeatPitch SeatWidth
## Price                    0.07     -0.06
## AdvancedBookingDays     -0.01      0.05
## FlyingMinutes           -0.03     -0.18
## Capacity                 0.51      0.45
## SeatPitch                1.00      0.32
## SeatWidth                0.32      1.00

************************************************************************

#Q1b. Write R code to generate the correlation matrix, along with their significance values, for the given continuous variables {"Price", "AdvancedBookingDays", "FlyingMinutes", "Capacity", "SeatPitch", "SeatWidth"}

library(Hmisc)
price2 <- price1[,c("Price", "AdvancedBookingDays", "FlyingMinutes", "Capacity", "SeatPitch", "SeatWidth")]
rcorr(as.matrix(price2), type="pearson")

************************************************************************

#Q1c. Write R code to visualize the correlation matrix in Q1b.
library(corrgram)
price2 <- price1[,c("Price", "AdvancedBookingDays", "FlyingMinutes", "Capacity", "SeatPitch", "SeatWidth")]
corrgram(as.matrix(price2), order=TRUE, lower.panel=panel.conf,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Visualization of correlation")

#************************************************************************

#Q1d. Write R code to generate the corrgram using PerformanceAnalytics package
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(price2, histogram = TRUE, pch=19)

#Q2a. Test whether the ticket prices of Mumbai to Delhi flights are more than INR 5000.
# H0: BOM - DEL flights ticket price is equal to INR 5000
# Alternate Hypothesis: BOM - DEL flights ticket price is more than INR 5000
attach(price1)
# Inspect structure of data
head(price1)
##   FlightNumber   Airline DepartureCityCode ArrivalCityCode DepartureTime
## 1       9W 313       Jet               DEL             BOM           225
## 2       9W 339       Jet               BOM             DEL           300
## 3       SG 161 Spice Jet               DEL             BOM           350
## 4       6E 171    IndiGo               DEL             BOM           455
## 5       SG 160 Spice Jet               BOM             DEL           555
## 6       9W 762       Jet               BOM             DEL           605
##   ArrivalTime Departure FlyingMinutes Aircraft PlaneModel Capacity
## 1         435        AM           130   Boeing        738      156
## 2         505        AM           125   Boeing        738      156
## 3         605        AM           135   Boeing        738      189
## 4         710        AM           135   Airbus       A320      180
## 5         805        AM           130   Boeing        738      189
## 6         815        AM           130   Boeing        738      156
##   SeatPitch SeatWidth DataCollectionDate DateDeparture IsWeekend Price
## 1        30        17        Sep 13 2018    Nov 6 2018        No  4051
## 2        30        17        Sep 15 2018    Nov 6 2018        No 11587
## 3        29        17        Sep 19 2018    Nov 6 2018        No  3977
## 4        30        18         Sep 8 2018    Nov 6 2018        No  4234
## 5        29        17        Sep 19 2018    Nov 6 2018        No  6837
## 6        30        17        Sep 15 2018    Nov 6 2018        No  6518
##   AdvancedBookingDays IsDiwali DayBeforeDiwali DayAfterDiwali
## 1                  54        1               1              0
## 2                  52        1               1              0
## 3                  48        1               1              0
## 4                  59        1               1              0
## 5                  48        1               1              0
## 6                  52        1               1              0
##   MetroDeparture MetroArrival MarketShare LoadFactor
## 1              1            1        15.4      83.32
## 2              1            1        15.4      83.32
## 3              1            1        13.2      94.06
## 4              1            1        39.6      87.20
## 5              1            1        13.2      94.06
## 6              1            1        15.4      83.32
price_Mum_Del_data <- price1$Price[ which( price1$DepartureCityCode =="BOM" & price1$ArrivalCityCode =="DEL" )]
# Perform a t test on ticket price from Bom to Del (one-tailed test)
t.test(price_Mum_Del_data, mu=5000, alternative = "greater")
## 
##  One Sample t-test
## 
## data:  price_Mum_Del_data
## t = 6.0784, df = 129, p-value = 6.385e-09
## alternative hypothesis: true mean is greater than 5000
## 95 percent confidence interval:
##  5910.787      Inf
## sample estimates:
## mean of x 
##  6252.054
# Result of Independent t-test
# Based on the above output of the t-test, we can REJECT the hypothesis (p < 0.001) as the p value is less than 0.001. The t test showed there the ticket prices of Mumbai (BOM) to Delhi (DEL) flights is not equal to INR 5000 and the mean value (6252.054) is greater than INR 5000. 

************************************************************************

# Q2b. Test whether the ticket prices of morning flights are greater than the afternoon flights
# H0: Ticket price for morning flights are equal to Ticket price for afternoon flights
# Alternate Hypothesis : The ticket prices of morning flights are greater than the afternoon flights
# Generate descriptive statistics for each group
aggregate(Price~Departure, data=price1, FUN = mean)
##   Departure    Price
## 1        AM 5598.893
## 2        PM 5140.610
# Generate box plot for each group
boxplot(Price~Departure, main = "Average Ticket Prices", xlab = "Departure Time",col = (c("green","blue")), ylab = "Average Ticket Price")

# Perform a t test on the transformed variable
t.test(Price~Departure, var.equal = TRUE, alternative="greater")
## 
##  Two Sample t-test
## 
## data:  Price by Departure
## t = 1.6706, df = 303, p-value = 0.04791
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  5.691872      Inf
## sample estimates:
## mean in group AM mean in group PM 
##         5598.893         5140.610
#Result of Independent t-test
# Based on the above output of the t-test, the p-value is greater then 0.001, hence we have no evidence for rejecting the null hypothesis. There is no significance difference in the ticket prices of morning flights compared to afternoon flights. 

************************************************************************

# Q2c. Test whether the ticket prices around Diwali is more compared to non-Diwali ticket prices
# H0: Ticket Prices during Diwali is equal to non-Diwali ticket prices 
# Alternate Hypothesis: Ticket Prices during Diwali is more compared to non-Diwali ticket prices 
# Generate descriptive statistics for each group
aggregate(Price~IsDiwali, data=price1, FUN = mean)
##   IsDiwali    Price
## 1        0 5063.810
## 2        1 5897.479
# Generate box plot for each group
boxplot(Price~IsDiwali, main = "Average Ticket Prices", xlab = "Diwali Time",col = (c("green","blue")), ylab = "Average Ticket Price")

# Perform a t test on the transformed variable
t.test(Price~IsDiwali,var.equal = TRUE, alternative="less")
## 
##  Two Sample t-test
## 
## data:  Price by IsDiwali
## t = -3.022, df = 303, p-value = 0.001363
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf -378.5134
## sample estimates:
## mean in group 0 mean in group 1 
##        5063.810        5897.479
#t.test(airlineData.df[airlineData.df$IsDiwali==1,]$Price, airlineData.df[airlineData.df$IsDiwali==0,]$Price, var.equal = TRUE, alternative="greater")
# Result of Independent t-test
# Based on the output of the t-test, we dont have evidence of rejecting the null hypothesis  (p < 0.001) since the p value is slightly greater than 0.001. 

************************************************************************

# Q2d. Test whether the ticket prices on Air India flights are greater than IndiGo flights
# H0: Ticket Prices (Air India) = Ticket Prices (Indigo)
# H1: Ticket Prices (Air India) > Ticket Prices (Indigo)
# Generate descriptive statistics for each group
aggregate(Price~Airline, data=price1, FUN = mean)
##     Airline    Price
## 1 Air India 6335.000
## 2    IndiGo 4879.525
## 3       Jet 5496.146
## 4 Spice Jet 5094.850
# Generate box plot for each group
boxplot(Price~Airline, main = "Average Ticket Prices", xlab = "Airline",col = (c("green","blue")), ylab = "Average Ticket Price")

# Perform a t test on the transformed variable
AI_data = price1$Price[price1$Airline == 'Air India']
Indigo_data = price1$Price[price1$Airline == 'IndiGo']
# Number of observations 
length(AI_data)
## [1] 41
length(Indigo_data)
## [1] 80
t.test(AI_data, Indigo_data, paired=FALSE, var.equal = TRUE, alternative="greater")
## 
##  Two Sample t-test
## 
## data:  AI_data and Indigo_data
## t = 2.6396, df = 119, p-value = 0.004705
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  541.4039      Inf
## sample estimates:
## mean of x mean of y 
##  6335.000  4879.525
# Result of Independent t-test
# Based on the above output of the t-test,we dont have evidence of rejecting the null hypothesis (p < 0.001) since the p value is slightly greater than 0.001. 

************************************************************************

# QUESTION 3
# Q3a. Run a simple linear regression of airline ticket Price on the Advanced Booking Days. Write R code to output the summary of the model
# Independent variable (say x) 
xAdvBookDays <- price1$AdvancedBookingDays
# Dependent Variable (say y)
yPrice <- price1$Price
# Plot Advance Booking Days vs Price Graph
plot(xAdvBookDays, yPrice, xlab="Advance Booking Days", ylab="Price (INR)")

scatter.smooth(xAdvBookDays, yPrice)
# Simple Linear Regression Model
fit <- lm(yPrice ~ xAdvBookDays, data = price1)
# Beta Coefficients
fit$coefficients
##  (Intercept) xAdvBookDays 
## 5422.9589348   -0.9830394
#***************** Model: price = 5422.959 - 0.983 ??? Advance Booking Days ****************
lines(xAdvBookDays, (5422.9589348 - 0.9830394 * xAdvBookDays), col = 3)

# Confidence Interval (95%)
confint(fit)
##                   2.5 %     97.5 %
## (Intercept)  4981.18757 5864.73030
## xAdvBookDays  -13.09224   11.12616
# Predicted Values
fitted(fit) 
##        1        2        3        4        5        6        7        8 
## 5369.875 5371.841 5375.773 5364.960 5375.773 5371.841 5375.773 5364.960 
##        9       10       11       12       13       14       15       16 
## 5375.773 5371.841 5369.875 5375.773 5364.960 5371.841 5375.773 5375.773 
##       17       18       19       20       21       22       23       24 
## 5371.841 5369.875 5375.773 5364.960 5371.841 5369.875 5373.807 5375.773 
##       25       26       27       28       29       30       31       32 
## 5364.960 5375.773 5375.773 5369.875 5373.807 5375.773 5364.960 5373.807 
##       33       34       35       36       37       38       39       40 
## 5364.960 5369.875 5375.773 5370.858 5373.807 5364.960 5366.926 5370.858 
##       41       42       43       44       45       46       47       48 
## 5373.807 5375.773 5366.926 5366.926 5370.858 5373.807 5370.858 5366.926 
##       49       50       51       52       53       54       55       56 
## 5375.773 5370.858 5366.926 5370.858 5366.926 5371.841 5375.773 5366.926 
##       57       58       59       60       61       62       63       64 
## 5371.841 5366.926 5371.841 5367.909 5369.875 5373.807 5362.994 5373.807 
##       65       66       67       68       69       70       71       72 
## 5369.875 5373.807 5362.994 5373.807 5369.875 5367.909 5373.807 5362.994 
##       73       74       75       76       77       78       79       80 
## 5369.875 5373.807 5373.807 5369.875 5367.909 5373.807 5362.994 5369.875 
##       81       82       83       84       85       86       87       88 
## 5367.909 5371.841 5373.807 5362.994 5373.807 5373.807 5367.909 5371.841 
##       89       90       91       92       93       94       95       96 
## 5373.807 5362.994 5371.841 5362.994 5367.909 5373.807 5368.892 5371.841 
##       97       98       99      100      101      102      103      104 
## 5362.994 5364.960 5368.892 5371.841 5373.807 5364.960 5364.960 5368.892 
##      105      106      107      108      109      110      111      112 
## 5371.841 5368.892 5364.960 5371.841 5373.807 5368.892 5364.960 5368.892 
##      113      114      115      116      117      118      119      120 
## 5364.960 5371.841 5369.875 5373.807 5364.960 5369.875 5364.960 5371.841 
##      121      122      123      124      125      126      127      128 
## 5369.875 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      129      130      131      132      133      134      135      136 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      137      138      139      140      141      142      143      144 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      145      146      147      148      149      150      151      152 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      153      154      155      156      157      158      159      160 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      161      162      163      164      165      166      167      168 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      169      170      171      172      173      174      175      176 
## 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 5393.468 
##      177      178      179      180      181      182      183      184 
## 5393.468 5393.468 5393.468 5393.468 5420.993 5420.993 5420.993 5420.993 
##      185      186      187      188      189      190      191      192 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 
##      193      194      195      196      197      198      199      200 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5416.078 5416.078 5420.993 
##      201      202      203      204      205      206      207      208 
## 5416.078 5420.993 5416.078 5420.993 5416.078 5420.993 5416.078 5416.078 
##      209      210      211      212      213      214      215      216 
## 5420.993 5416.078 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 
##      217      218      219      220      221      222      223      224 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5416.078 5416.078 
##      225      226      227      228      229      230      231      232 
## 5416.078 5416.078 5416.078 5416.078 5420.993 5416.078 5420.993 5416.078 
##      233      234      235      236      237      238      239      240 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 
##      241      242      243      244      245      246      247      248 
## 5420.993 5420.993 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 
##      249      250      251      252      253      254      255      256 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 5420.993 
##      257      258      259      260      261      262      263      264 
## 5420.993 5420.993 5420.993 5420.993 5420.993 5416.078 5416.078 5420.993 
##      265      266      267      268      269      270      271      272 
## 5416.078 5416.078 5420.993 5416.078 5416.078 5420.993 5420.993 5416.078 
##      273      274      275      276      277      278      279      280 
## 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 
##      281      282      283      284      285      286      287      288 
## 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 
##      289      290      291      292      293      294      295      296 
## 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 
##      297      298      299      300      301      302      303      304 
## 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 5416.078 
##      305 
## 5416.078
# Residuals
residuals(fit)
##            1            2            3            4            5 
## -1318.874809  6215.159113 -1398.773045 -1130.959612  1461.226955 
##            6            7            8            9           10 
##  1146.159113 -2186.773045 -1130.959612  3247.226955  1461.159113 
##           11           12           13           14           15 
## -2058.874809  2301.226955  -211.959612  4506.159113 -1398.773045 
##           16           17           18           19           20 
##  1189.226955  4506.159113  -373.874809  7069.226955  -211.959612 
##           21           22           23           24           25 
##  4506.159113 -2215.874809  4504.193034  7069.226955  -211.959612 
##           26           27           28           29           30 
## -1398.773045  3247.226955 -1318.874809  4504.193034  8381.226955 
##           31           32           33           34           35 
## -1130.959612  4504.193034 -1130.959612 -1318.874809  8381.226955 
##           36           37           38           39           40 
## -1319.857848  6213.193034 -1130.959612 -1132.925691     2.142152 
##           41           42           43           44           45 
##  3664.193034  1189.226955 -1132.925691 -1132.925691 -1319.857848 
##           46           47           48           49           50 
##  4504.193034 -1319.857848  -213.925691  8199.226955 -1319.857848 
##           51           52           53           54           55 
##  -213.925691 -1319.857848 -1376.925691 -1320.840887  1189.226955 
##           56           57           58           59           60 
## -2759.925691 -1320.840887 -2759.925691 -1320.840887  -686.908730 
##           61           62           63           64           65 
##   308.125191 -1396.806966 -1128.993533  2303.193034  3668.125191 
##           66           67           68           69           70 
## -1396.806966 -1128.993533  1359.193034    -6.874809    48.091270 
##           71           72           73           74           75 
##  1359.193034  -209.993533   518.125191 -1396.806966   351.193034 
##           76           77           78           79           80 
##    -6.874809  3540.091270   351.193034  -209.993533    -6.874809 
##           81           82           83           84           85 
##    48.091270  3666.159113   351.193034  -209.993533 -1396.806966 
##           86           87           88           89           90 
##   623.193034  -686.908730   306.159113  1191.193034 -1128.993533 
##           91           92           93           94           95 
##   306.159113 -1128.993533  -686.908730   351.193034  -687.891769 
##           96           97           98           99          100 
##  5375.159113 -1128.993533  -767.959612  2069.108231  -218.840887 
##          101          102          103          104          105 
##   351.193034 -1130.959612 -1130.959612  -687.891769   831.159113 
##          106          107          108          109          110 
##  -687.891769   331.040388  -344.840887   351.193034  -687.891769 
##          111          112          113          114          115 
##  -211.959612  -687.891769  -211.959612  -344.840887  -688.874809 
##          116          117          118          119          120 
##  -803.806966  -767.959612  -688.874809 -1130.959612  -248.840887 
##          121          122          123          124          125 
##  -688.874809 -2681.467754 -2362.467754 -2362.467754 -2362.467754 
##          126          127          128          129          130 
## -2362.467754 -2362.467754 -2681.467754 -2681.467754 -1840.467754 
##          131          132          133          134          135 
## -1840.467754 -1840.467754 -1840.467754 -1105.467754 -2260.467754 
##          136          137          138          139          140 
## -1105.467754  -580.467754  -895.467754  -580.467754  -580.467754 
##          141          142          143          144          145 
##  -580.467754 -1789.467754 -2078.467754 -1422.467754  -855.467754 
##          146          147          148          149          150 
## -1422.467754 -1422.467754 -2260.467754  -360.467754 -2774.467754 
##          151          152          153          154          155 
##  -865.467754  -865.467754  -865.467754 -1364.467754  -366.467754 
##          156          157          158          159          160 
##  -366.467754 -1836.467754  -760.467754 -1836.467754 -1182.467754 
##          161          162          163          164          165 
##  2283.532246 -1312.467754  -823.467754  -823.467754  -823.467754 
##          166          167          168          169          170 
## -2204.467754 -1706.467754  -823.467754  -823.467754  -298.467754 
##          171          172          173          174          175 
##  -298.467754  -298.467754 -2786.467754 -2412.467754 -1658.467754 
##          176          177          178          179          180 
## -2362.467754 -2362.467754 -2199.467754 -2412.467754 -2786.467754 
##          181          182          183          184          185 
##  3509.007144 10573.007144 12594.007144 10573.007144 10573.007144 
##          186          187          188          189          190 
##  2546.007144  1660.007144  -823.992856 -1186.992856  -823.992856 
##          191          192          193          194          195 
##  -823.992856  -267.992856   782.007144  2966.007144  -267.992856 
##          196          197          198          199          200 
##  -267.992856 -1967.992856 -2704.077659 -1681.077659 -1967.992856 
##          201          202          203          204          205 
## -1681.077659 -1967.992856 -2111.077659 -1169.992856 -1182.077659 
##          206          207          208          209          210 
## -1967.992856 -1681.077659 -1681.077659 -1867.992856  -819.077659 
##          211          212          213          214          215 
##  1901.007144  3513.007144  1901.007144  2066.007144  1649.007144 
##          216          217          218          219          220 
##  2426.007144  -792.992856  2588.007144  4142.007144  4142.007144 
##          221          222          223          224          225 
##  3512.007144  3512.007144 -1426.077659  -819.077659 -1426.077659 
##          226          227          228          229          230 
## -1681.077659 -2111.077659  -263.077659  -712.992856 -1681.077659 
##          231          232          233          234          235 
##   258.007144 -2222.077659 -1169.992856   204.007144   204.007144 
##          236          237          238          239          240 
##   204.007144  -792.992856   204.007144  2063.007144  2777.007144 
##          241          242          243          244          245 
##  2063.007144   204.007144 -1165.077659 -1165.077659 -1165.077659 
##          246          247          248          249          250 
## -1165.077659   262.922341  -708.077659  -183.992856  -683.992856 
##          251          252          253          254          255 
##  1286.007144  3306.007144  -683.992856  2230.007144  -588.992856 
##          256          257          258          259          260 
##  -588.992856  -588.992856  1286.007144   682.007144   724.007144 
##          261          262          263          264          265 
##   -63.992856  -708.077659  -393.077659   146.007144  -708.077659 
##          266          267          268          269          270 
##  1653.922341  2036.007144  1653.922341  -393.077659   724.007144 
##          271          272          273          274          275 
##   -63.992856  -788.077659  -788.077659  -998.077659  -788.077659 
##          276          277          278          279          280 
##   208.922341  -473.077659 -1165.077659  -708.077659 -1863.077659 
##          281          282          283          284          285 
##  -106.077659   208.922341  -788.077659  -788.077659 -1418.077659 
##          286          287          288          289          290 
##  -998.077659  -998.077659 -1786.077659 -1418.077659 -2173.077659 
##          291          292          293          294          295 
## -1623.077659 -1333.077659 -2043.077659 -1203.077659 -1753.077659 
##          296          297          298          299          300 
##   728.922341  -584.077659 -2173.077659 -2359.077659  -584.077659 
##          301          302          303          304          305 
##   -59.077659  -584.077659  -584.077659  -584.077659 -1161.077659
# Statistical Significance and p-values
summary(fit)
## 
## Call:
## lm(formula = yPrice ~ xAdvBookDays, data = price1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2786.5 -1320.8  -688.9   351.2 12594.0 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5422.959    224.497   24.16   <2e-16 ***
## xAdvBookDays   -0.983      6.154   -0.16    0.873    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2392 on 303 degrees of freedom
## Multiple R-squared:  8.422e-05,  Adjusted R-squared:  -0.003216 
## F-statistic: 0.02552 on 1 and 303 DF,  p-value: 0.8732