setwd("C:/Users/Dell/Downloads/Sameer Mathur")
hotel.df<- read.csv("Cities42.csv")
View(hotel.df)
hotel.df$Date<-gsub("18-Dec-16", "Dec 18 2016", hotel.df$Date)
hotel.df$Date<-gsub("21-Dec-16", "Dec 21 2016", hotel.df$Date)
hotel.df$Date<-gsub("24-Dec-16", "Dec 24 2016", hotel.df$Date)
hotel.df$Date<-gsub("25-Dec-16", "Dec 25 2016", hotel.df$Date)
hotel.df$Date<-gsub("28-Dec-16", "Dec 28 2016", hotel.df$Date)
hotel.df$Date<-gsub("31-Dec-16", "Dec 31 2016", hotel.df$Date)
hotel.df$Date<-gsub("4-Jan-17", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("4-Jan-16", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("8-Jan-16", "Jan 08 2017", hotel.df$Date)
hotel.df$Date<-gsub("8-Jan-17", "Jan 08 2017", hotel.df$Date)
hotel.df$Date<-gsub("Jan 4 2017", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("Jan 8 2017", "Jan 08 2017", hotel.df$Date)
table(hotel.df$Date)
##
## 0Jan 04 2017 0Jan 08 2017 Dec 18 2016 Dec 21 2016 Dec 24 2016
## 44 44 1652 1655 1655
## Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017
## 1655 1655 1655 1608 1609
hotel.df$Date<-factor(hotel.df$Date)
is.factor(hotel.df$Date)
## [1] TRUE
levels(hotel.df$Date)
## [1] "0Jan 04 2017" "0Jan 08 2017" "Dec 18 2016" "Dec 21 2016"
## [5] "Dec 24 2016" "Dec 25 2016" "Dec 28 2016" "Dec 31 2016"
## [9] "Jan 04 2017" "Jan 08 2017"
summary(hotel.df)
## CityName Population CityRank IsMetroCity
## Delhi :2048 Min. : 8096 Min. : 0.00 Min. :0.0000
## Jaipur : 768 1st Qu.: 744983 1st Qu.: 2.00 1st Qu.:0.0000
## Mumbai : 712 Median : 3046163 Median : 9.00 Median :0.0000
## Bangalore: 656 Mean : 4416837 Mean :14.83 Mean :0.2842
## Goa : 624 3rd Qu.: 8443675 3rd Qu.:24.00 3rd Qu.:1.0000
## Kochi : 608 Max. :12442373 Max. :44.00 Max. :1.0000
## (Other) :7816
## IsTouristDestination IsWeekend IsNewYearEve Date
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Dec 21 2016:1655
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Dec 24 2016:1655
## Median :1.0000 Median :1.0000 Median :0.0000 Dec 25 2016:1655
## Mean :0.6972 Mean :0.6228 Mean :0.1244 Dec 28 2016:1655
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 Dec 31 2016:1655
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Dec 18 2016:1652
## (Other) :3305
## HotelName RoomRent StarRating
## Vivanta by Taj : 32 Min. : 299 Min. :0.000
## Goldfinch Hotel : 24 1st Qu.: 2436 1st Qu.:3.000
## OYO Rooms : 24 Median : 4000 Median :3.000
## The Gordon House Hotel: 24 Mean : 5474 Mean :3.459
## Apnayt Villa : 16 3rd Qu.: 6299 3rd Qu.:4.000
## Bentleys Hotel Colaba : 16 Max. :322500 Max. :5.000
## (Other) :13096
## Airport
## Min. : 0.20
## 1st Qu.: 8.40
## Median : 15.00
## Mean : 21.16
## 3rd Qu.: 24.00
## Max. :124.00
##
## HotelAddress
## The Mall, Shimla : 32
## #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India: 16
## 121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra : 16
## 14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
## 144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu : 16
## 17, Oliver Road, Colaba, Mumbai, Maharashtra : 16
## (Other) :13120
## HotelPincode HotelDescription FreeWifi FreeBreakfast
## Min. : 100025 3 : 120 Min. :0.0000 Min. :0.0000
## 1st Qu.: 221001 Abc : 112 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 395003 3-star hotel: 104 Median :1.0000 Median :1.0000
## Mean : 397430 3.5 : 88 Mean :0.9259 Mean :0.6491
## 3rd Qu.: 570001 4 : 72 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :7000157 (Other) :12728 Max. :1.0000 Max. :1.0000
## NA's : 8
## HotelCapacity HasSwimmingPool
## Min. : 0.00 Min. :0.0000
## 1st Qu.: 16.00 1st Qu.:0.0000
## Median : 34.00 Median :0.0000
## Mean : 62.51 Mean :0.3558
## 3rd Qu.: 75.00 3rd Qu.:1.0000
## Max. :600.00 Max. :1.0000
##
library(psych)
describe(hotel.df)
metro<- table(hotel.df$IsMetroCity)
metro
##
## 0 1
## 9472 3760
tourist<- table(hotel.df$IsTouristDestination)
tourist
##
## 0 1
## 4007 9225
weekend<- table(hotel.df$IsWeekend)
weekend
##
## 0 1
## 4991 8241
newyear<- table(hotel.df$IsNewYearEve)
newyear
##
## 0 1
## 11586 1646
date<- table(hotel.df$Date)
date
##
## 0Jan 04 2017 0Jan 08 2017 Dec 18 2016 Dec 21 2016 Dec 24 2016
## 44 44 1652 1655 1655
## Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017
## 1655 1655 1655 1608 1609
starrating<- table(hotel.df$StarRating)
starrating
##
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
airport<- table(hotel.df$Airport)
airport
##
## 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 1.1 1.2 1.4
## 16 32 40 24 32 24 8 39 32 16 40 8
## 1.5 1.6 1.7 1.8 1.9 2 2.1 2.2 2.3 2.4 2.5 2.6
## 16 32 22 72 40 56 40 24 16 32 56 48
## 2.7 2.8 2.9 3 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 56 24 56 56 16 24 16 48 56 64 16 40
## 3.9 4 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5
## 32 72 32 40 32 32 24 40 24 40 32 73
## 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6 6.1 6.2
## 72 72 32 40 48 40 32 56 40 33 32 64
## 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7 7.1 7.2 7.3 7.4
## 16 48 48 40 24 56 40 49 72 48 24 40
## 7.5 7.6 7.7 7.8 7.9 8 8.1 8.2 8.3 8.4 8.5 8.6
## 48 71 48 32 72 73 72 56 40 48 64 16
## 8.7 8.8 8.9 9 9.1 9.2 9.3 9.4 9.5 9.6 9.7 9.8
## 56 16 16 49 24 62 48 80 22 40 24 40
## 9.9 10 10.2 10.3 10.4 10.6 10.7 10.8 10.9 11 11.1 11.3
## 56 298 8 8 8 8 8 8 16 610 16 16
## 11.7 11.9 12 12.2 12.3 12.6 12.7 13 13.1 13.3 13.5 13.6
## 8 16 354 24 8 24 16 319 16 8 8 24
## 13.7 13.8 14 14.2 14.4 14.5 14.6 14.7 14.8 14.9 15 15.3
## 16 8 399 16 24 8 16 24 16 8 441 16
## 15.4 15.6 15.7 15.8 15.9 16 16.1 16.2 16.4 16.5 16.7 17
## 16 8 8 8 8 409 16 8 8 32 32 313
## 17.1 17.2 17.4 17.5 17.6 17.8 18 18.3 18.5 18.6 18.7 19
## 8 16 8 16 8 16 424 8 16 8 8 200
## 19.5 19.9 20 20.2 20.3 20.5 20.9 21 21.4 21.5 22 22.1
## 8 8 384 8 16 8 8 248 24 8 305 8
## 22.2 22.4 22.5 23 23.2 23.3 23.4 24 24.2 24.3 24.5 24.6
## 8 8 8 304 8 16 8 167 16 16 16 8
## 24.7 24.9 25 25.6 25.7 25.9 26 26.1 26.3 26.4 26.5 26.7
## 8 32 208 8 8 8 300 8 8 24 8 8
## 27 27.1 27.2 28 28.1 28.6 28.7 29 30 30.5 31 31.2
## 272 8 8 112 8 8 8 88 56 8 224 8
## 31.3 31.9 32 32.9 33 33.4 34 35 36 36.2 37 38
## 16 8 72 8 40 16 16 49 17 8 49 49
## 38.3 39 39.9 40 41 42 42.7 43 43.9 44 44.5 44.6
## 8 100 8 56 102 41 16 33 8 8 8 8
## 44.8 46 47 47.5 48 48.4 49 50 50.1 50.5 51 52
## 8 40 8 8 16 8 8 8 8 8 16 16
## 52.7 53 55 57.2 60 61 62 63 63.5 63.6 65 67.6
## 8 8 8 8 8 16 32 8 8 8 152 8
## 69 73.1 80 80.3 81 82 83 84 85 86 87 91.3
## 8 8 1 8 1 9 1 1 1 1 1 8
## 96.5 100 102.4 105 110 117.4 124
## 8 136 8 240 64 8 128
wifi<- table(hotel.df$FreeWifi)
wifi
##
## 0 1
## 981 12251
breakfast<- table(hotel.df$FreeBreakfast)
breakfast
##
## 0 1
## 4643 8589
capacity<- table(hotel.df$HotelCapacity)
capacity
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 8 32 16 48 32 104 72 32 152 88 521 120 112 40 96
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 1782 192 112 168 169 296 176 192 128 289 240 144 160 176 86
## 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 296 136 264 104 120 112 168 56 120 144 256 96 144 73 128
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
## 144 72 24 160 96 104 40 120 24 72 72 48 40 40 48
## 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
## 128 40 24 40 56 88 40 80 48 48 72 30 72 32 16
## 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## 32 40 48 40 24 88 32 40 32 24 64 8 32 48 8
## 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
## 40 8 48 24 32 16 16 16 48 32 78 16 40 16 64
## 106 107 108 109 110 111 112 113 114 115 117 119 120 121 122
## 48 24 48 16 24 16 64 16 24 32 48 24 16 8 16
## 124 126 127 128 129 130 132 133 134 135 136 137 138 139 140
## 8 16 24 24 40 31 56 8 8 16 8 32 8 8 40
## 141 142 144 145 147 148 149 150 151 153 154 155 159 160 162
## 32 24 16 24 16 8 24 48 24 15 24 16 8 24 8
## 164 165 166 167 170 171 172 173 176 177 178 179 180 181 182
## 24 8 8 16 16 8 24 16 8 16 32 16 8 24 8
## 183 184 187 189 190 191 195 196 197 198 199 200 201 202 203
## 16 40 8 8 7 8 8 8 8 16 8 24 8 32 8
## 204 205 207 208 209 210 211 212 214 215 216 218 220 222 223
## 16 8 8 24 8 8 24 8 24 8 8 32 24 8 8
## 228 229 230 231 232 233 234 235 236 237 240 244 247 248 251
## 8 8 8 8 8 8 8 8 8 8 8 24 8 8 8
## 254 255 260 261 264 267 269 270 273 279 281 286 287 292 293
## 8 8 8 8 16 1 8 8 8 8 8 8 16 8 8
## 302 310 311 317 323 324 326 327 334 340 341 385 390 393 400
## 8 8 8 6 8 8 8 24 8 8 8 8 8 8 8
## 403 411 414 419 436 451 461 480 493 507 523 550 560 600
## 8 16 8 8 8 8 8 8 16 8 16 8 16 8
pool<- table(hotel.df$HasSwimmingPool)
pool
##
## 0 1
## 8524 4708
barplot(starrating)

barplot(capacity)

barplot(pool)

plot(hotel.df$RoomRent,hotel.df$StarRating)

plot(hotel.df$RoomRent,hotel.df$HotelCapacity)

plot(hotel.df$RoomRent,hotel.df$HasSwimmingPool)

library(corrgram)
corrgram(hotel.df,order=TRUE,lower.panel = panel.shade, upper.panel = panel.pie,text.panel = panel.txt,main="Corrgram of Hotel Room Pricing factors")

t.test(RoomRent~HasSwimmingPool,data = hotel.df)
##
## Welch Two Sample t-test
##
## data: RoomRent by HasSwimmingPool
## t = -29.013, df = 5011.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5096.030 -4450.942
## sample estimates:
## mean in group 0 mean in group 1
## 3775.566 8549.052
t.test(hotel.df$RoomRent,hotel.df$HotelCapacity)
##
## Welch Two Sample t-test
##
## data: hotel.df$RoomRent and hotel.df$HotelCapacity
## t = 84.882, df = 13234, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5286.515 5536.445
## sample estimates:
## mean of x mean of y
## 5473.99184 62.51164
t.test(RoomRent~FreeBreakfast, data = hotel.df)
##
## Welch Two Sample t-test
##
## data: RoomRent by FreeBreakfast
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -153.5017 460.9935
## sample estimates:
## mean in group 0 mean in group 1
## 5573.790 5420.044
t.test(hotel.df$RoomRent,hotel.df$StarRating)
##
## Welch Two Sample t-test
##
## data: hotel.df$RoomRent and hotel.df$StarRating
## t = 85.813, df = 13231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5345.575 5595.491
## sample estimates:
## mean of x mean of y
## 5473.991838 3.458933
t.test(RoomRent~IsMetroCity, data = hotel.df)
##
## Welch Two Sample t-test
##
## data: RoomRent by IsMetroCity
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 888.0308 1285.4102
## sample estimates:
## mean in group 0 mean in group 1
## 5782.794 4696.073