Apurva
30th Oct, 2017
setwd("~/Downloads/IIM Lucknow/TERM 5/DAM/Hotels")
library(readr)
hotel <- read.csv(paste("AllCitiesData.csv", sep=""))
summary(hotel)
CityName Population CityRank IsTouristDestination
Jaipur : 765 Min. : 8096 Min. : 3.00 No :2168
Goa : 624 1st Qu.: 595575 1st Qu.: 9.00 Yes:4299
Kochi : 608 Median :1457723 Median :15.00
Pune : 600 Mean :1515470 Mean :16.03
Udaipur: 456 3rd Qu.:2490891 3rd Qu.:23.00
Agra : 432 Max. :4467797 Max. :37.00
(Other):2982
IsWeekend IsNewYearEve Date
No :2424 No :5658 Dec 25 2016: 810
Yes:4043 Yes: 809 Dec 28 2016: 810
Dec 18 2016: 809
Dec 21 2016: 809
Dec 24 2016: 809
Dec 31 2016: 809
(Other) :1611
HotelName RoomRent StarRating
Apnayt Villa : 16 Min. : 299 Min. :0.000
Fortune Hotel The South Park : 16 1st Qu.: 2482 1st Qu.:3.000
Golden Tulip Chandigarh Panchkula: 16 Median : 4000 Median :3.000
Goldfinch Hotel : 16 Mean : 6025 Mean :3.424
Holiday Inn Chandigarh Panchkula : 16 3rd Qu.: 6499 3rd Qu.:4.000
Hotel City Park : 16 Max. :75000 Max. :5.000
(Other) :6371
Airport
Min. : 0.20
1st Qu.: 6.20
Median :12.00
Mean :15.17
3rd Qu.:22.50
Max. :61.00
HotelAddress
14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
57/1,Tamil Sangam Road, Simmakkal, Madurai, 625001, India : 16
B-4/25, Shivala Ghat, Nepali Kothi, Varanasi, Uttar Pradesh: 16
Chowara Beach, Kovalam, Chowara, Kerala : 16
Circuit House Road,,Jodhpur,Rajasthan,India : 16
Fatehabad Road, Agra, Uttar Pradesh, 282001 India : 16
(Other) :6371
HotelPincode HotelDescription
Min. :134109 3-star Jaipur hotel with restaurant : 64
1st Qu.:282001 Abc : 64
Median :342802 Jaipur hotel next to shopping : 56
Mean :388606 3 lmn : 40
3rd Qu.:411057 3-star Jaisalmer hotel with restaurant: 40
Max. :781022 3-star hotel in Pune : 40
(Other) :6163
FreeWifi FreeBreakfast HotelCapacity HasSwimmingPool
No : 542 No :2504 Min. : 1.00 No :3867
Yes:5925 Yes:3963 1st Qu.: 15.00 Yes:2600
Median : 30.00
Mean : 49.55
3rd Qu.: 64.00
Max. :414.00
hotel$id <- paste(hotel$HotelName, hotel$CityName)
length(unique(hotel$id))
[1] 821
length(unique(hotel$CityName))
[1] 25
mean(hotel$RoomRent)
[1] 6024.896
median(hotel$RoomRent)
[1] 4000
tourist <- subset(hotel, hotel$IsTouristDestination == "Yes")
length(unique(tourist$CityName))
[1] 13
nontourist <- subset(hotel, hotel$IsTouristDestination == "No")
length(unique(nontourist$CityName))
[1] 12
mt <- aggregate(id ~ IsTouristDestination, hotel, function(x) length(unique(x)))
colnames(mt) <- c("Tourist Destination","Count")
mt
Tourist Destination Count
1 No 292
2 Yes 529
mt1 <- aggregate(hotel$RoomRent ~ hotel$IsTouristDestination, FUN = mean)
colnames(mt1) <- c("Tourist Destination","Average Rent")
mt1
Tourist Destination Average Rent
1 No 4507.500
2 Yes 6790.124
library(gplots)
plotmeans(hotel$RoomRent ~ hotel$IsTouristDestination, mean.labels = TRUE, xlab = "Tourist Destination", ylab = "Average Room Rent")
t.test(hotel$RoomRent ~ hotel$IsTouristDestination)
Welch Two Sample t-test
data: hotel$RoomRent by hotel$IsTouristDestination
t = -14.33, df = 6464.8, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2594.885 -1970.361
sample estimates:
mean in group No mean in group Yes
4507.500 6790.124
t.test(hotel$RoomRent ~ hotel$IsTouristDestination)
Welch Two Sample t-test
data: hotel$RoomRent by hotel$IsTouristDestination
t = -14.33, df = 6464.8, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2594.885 -1970.361
sample estimates:
mean in group No mean in group Yes
4507.500 6790.124
#As p-value < 0.05, we conclude that reject the Null hypothesis that average room rent is equal.
mytable2 <- with(hotel, tapply(id, list(IsTouristDestination,HasSwimmingPool), FUN = function(x) length(unique(x))))
colnames(mytable2) <- c("SP-No", "SP-Yes")
rownames(mytable2) <- c("TD-No", "TD-Yes")
mt2 <- subset(mytable2, select=c(2,1))
addmargins(mt2)
SP-Yes SP-No Sum
TD-No 93 199 292
TD-Yes 231 300 531
Sum 324 499 823
prop.table(mt2,1)
SP-Yes SP-No
TD-No 0.3184932 0.6815068
TD-Yes 0.4350282 0.5649718
a <- aggregate(hotel$RoomRent ~ hotel$IsTouristDestination + hotel$HasSwimmingPool, FUN = mean)
colnames(a)<- c("TouristDest","SwimmingPool","Average Rent")
a
TouristDest SwimmingPool Average Rent
1 No No 3328.812
2 Yes No 3966.258
3 No Yes 6691.175
4 Yes Yes 10563.975
a$lab <- c("NT-NoPool","T-NoPool","NT-Pool","T-Pool")
barplot(a$`Average Rent`, names.arg = a$lab, ylab = "AVerage Room Rent", col = rainbow(4))
m0 <- lm(RoomRent ~ HotelCapacity, data = hotel)
summary(m0)
Call:
lm(formula = RoomRent ~ HotelCapacity, data = hotel)
Residuals:
Min 1Q Median 3Q Max
-8554 -3230 -2103 263 69933
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4824.668 125.213 38.53 <2e-16 ***
HotelCapacity 24.222 1.731 13.99 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7334 on 6465 degrees of freedom
Multiple R-squared: 0.02938, Adjusted R-squared: 0.02923
F-statistic: 195.7 on 1 and 6465 DF, p-value: < 2.2e-16
m2 <- lm(RoomRent ~ IsTouristDestination + HotelCapacity + IsTouristDestination:HotelCapacity, data = hotel)
summary(m2)
Call:
lm(formula = RoomRent ~ IsTouristDestination + HotelCapacity +
IsTouristDestination:HotelCapacity, data = hotel)
Residuals:
Min 1Q Median 3Q Max
-9176 -3249 -1623 428 69357
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3439.566 215.215 15.982 < 2e-16
IsTouristDestinationYes 1885.360 263.152 7.165 8.67e-13
HotelCapacity 18.903 2.638 7.165 8.67e-13
IsTouristDestinationYes:HotelCapacity 12.914 3.470 3.721 2e-04
(Intercept) ***
IsTouristDestinationYes ***
HotelCapacity ***
IsTouristDestinationYes:HotelCapacity ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7228 on 6463 degrees of freedom
Multiple R-squared: 0.05749, Adjusted R-squared: 0.05706
F-statistic: 131.4 on 3 and 6463 DF, p-value: < 2.2e-16
# As p-value of the interaction variable is less than 0.05, we conclude that the interaction effect between Tourist Destination and Hotel Capacity is statistically significant. This differential impact is captured by the magnitude of B3 = 12.9