Project Title : Analysis of Room Rent of Hotels in 42 Cities
NAME : Devesh Patidar
EMAIL : devesh.bilpank@gmail.com
COLLEGE : IIT Bombay
cities42 <- read.csv(paste("Cities42.csv",sep = ""))
View(cities42)
dim(cities42)
## [1] 13232 20
summary(cities42)
## X CityName Population CityRank
## Min. : 1 Delhi :2048 Min. : 8096 Min. : 0.00
## 1st Qu.: 3309 Jaipur : 768 1st Qu.: 744983 1st Qu.: 2.00
## Median : 6616 Mumbai : 712 Median : 3046163 Median : 9.00
## Mean : 6616 Bangalore: 656 Mean : 4416837 Mean :14.83
## 3rd Qu.: 9924 Goa : 624 3rd Qu.: 8443675 3rd Qu.:24.00
## Max. :13232 Kochi : 608 Max. :12442373 Max. :44.00
## (Other) :7816
## IsMetroCity IsTouristDestination IsWeekend IsNewYearEve
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.2842 Mean :0.6972 Mean :0.6228 Mean :0.1244
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Date HotelName RoomRent
## Dec 21 2016:1611 Vivanta by Taj : 32 Min. : 299
## Dec 24 2016:1611 Goldfinch Hotel : 24 1st Qu.: 2436
## Dec 25 2016:1611 OYO Rooms : 24 Median : 4000
## Dec 28 2016:1611 The Gordon House Hotel: 24 Mean : 5474
## Dec 31 2016:1611 Apnayt Villa : 16 3rd Qu.: 6299
## Dec 18 2016:1608 Bentleys Hotel Colaba : 16 Max. :322500
## (Other) :3569 (Other) :13096
## StarRating Airport
## Min. :0.000 Min. : 0.20
## 1st Qu.:3.000 1st Qu.: 8.40
## Median :3.000 Median : 15.00
## Mean :3.459 Mean : 21.16
## 3rd Qu.:4.000 3rd Qu.: 24.00
## Max. :5.000 Max. :124.00
##
## HotelAddress
## The Mall, Shimla : 32
## #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India: 16
## 121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra : 16
## 14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
## 144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu : 16
## 17, Oliver Road, Colaba, Mumbai, Maharashtra : 16
## (Other) :13120
## HotelPincode HotelDescription FreeWifi FreeBreakfast
## Min. : 100025 3 : 120 Min. :0.0000 Min. :0.0000
## 1st Qu.: 221001 Abc : 112 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 395003 3-star hotel: 104 Median :1.0000 Median :1.0000
## Mean : 397430 3.5 : 88 Mean :0.9259 Mean :0.6491
## 3rd Qu.: 570001 4 : 72 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :7000157 (Other) :12728 Max. :1.0000 Max. :1.0000
## NA's : 8
## HotelCapacity HasSwimmingPool
## Min. : 0.00 Min. :0.0000
## 1st Qu.: 16.00 1st Qu.:0.0000
## Median : 34.00 Median :0.0000
## Mean : 62.51 Mean :0.3558
## 3rd Qu.: 75.00 3rd Qu.:1.0000
## Max. :600.00 Max. :1.0000
##
hotel <- subset(cities42, HotelCapacity!=0) #Removing bad data
View(hotel)
dim(hotel)
## [1] 13224 20
attach(hotel)
summary(hotel)
## X CityName Population CityRank
## Min. : 1 Delhi :2048 Min. : 8096 Min. : 0.00
## 1st Qu.: 3307 Jaipur : 768 1st Qu.: 744983 1st Qu.: 2.00
## Median : 6612 Mumbai : 712 Median : 3046163 Median : 9.00
## Mean : 6614 Bangalore: 656 Mean : 4419207 Mean :14.83
## 3rd Qu.: 9918 Goa : 624 3rd Qu.: 8443675 3rd Qu.:24.00
## Max. :13232 Kochi : 608 Max. :12442373 Max. :44.00
## (Other) :7808
## IsMetroCity IsTouristDestination IsWeekend IsNewYearEve
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.2843 Mean :0.6976 Mean :0.6226 Mean :0.1244
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
## Date HotelName RoomRent
## Dec 21 2016:1610 Vivanta by Taj : 32 Min. : 299
## Dec 24 2016:1610 Goldfinch Hotel : 24 1st Qu.: 2439
## Dec 25 2016:1610 OYO Rooms : 24 Median : 4000
## Dec 28 2016:1610 The Gordon House Hotel: 24 Mean : 5476
## Dec 31 2016:1610 Apnayt Villa : 16 3rd Qu.: 6300
## Dec 18 2016:1607 Bentleys Hotel Colaba : 16 Max. :322500
## (Other) :3567 (Other) :13088
## StarRating Airport
## Min. :0.000 Min. : 0.20
## 1st Qu.:3.000 1st Qu.: 8.40
## Median :3.000 Median : 15.00
## Mean :3.459 Mean : 21.16
## 3rd Qu.:4.000 3rd Qu.: 24.00
## Max. :5.000 Max. :124.00
##
## HotelAddress
## The Mall, Shimla : 32
## #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India: 16
## 121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra : 16
## 14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
## 144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu : 16
## 17, Oliver Road, Colaba, Mumbai, Maharashtra : 16
## (Other) :13112
## HotelPincode HotelDescription FreeWifi FreeBreakfast
## Min. : 100025 3 : 120 Min. :0.0000 Min. :0.0000
## 1st Qu.: 221001 Abc : 112 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 395003 3-star hotel: 104 Median :1.0000 Median :1.0000
## Mean : 397323 3.5 : 88 Mean :0.9258 Mean :0.6489
## 3rd Qu.: 562110 4 : 72 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :7000157 (Other) :12720 Max. :1.0000 Max. :1.0000
## NA's : 8
## HotelCapacity HasSwimmingPool
## Min. : 1.00 Min. :0.000
## 1st Qu.: 16.00 1st Qu.:0.000
## Median : 34.00 Median :0.000
## Mean : 62.55 Mean :0.356
## 3rd Qu.: 75.00 3rd Qu.:1.000
## Max. :600.00 Max. :1.000
##
hotel$RentCategory <- ifelse(RoomRent<= 2500, 1,ifelse(RoomRent<= 5000, 2, ifelse(RoomRent<= 7500, 3, ifelse(RoomRent<= 10000, 4, 5))))
View(hotel)
boxplot(log10(RoomRent))
table(CityName)
## CityName
## Agra Ahmedabad Amritsar Bangalore
## 432 424 136 656
## Bhubaneswar Chandigarh Chennai Darjeeling
## 120 336 416 136
## Delhi Gangtok Goa Guwahati
## 2048 128 624 48
## Haridwar Hyderabad Indore Jaipur
## 48 536 160 768
## Jaisalmer Jodhpur Kanpur Kochi
## 264 224 16 608
## Kolkata Lucknow Madurai Manali
## 512 128 112 288
## Mangalore Mumbai Munnar Mysore
## 96 712 328 160
## Nainital Ooty Panchkula Pune
## 144 136 64 600
## Puri Rajkot Rishikesh Shimla
## 56 128 88 280
## Srinagar Surat Thiruvanthipuram Thrissur
## 40 80 392 32
## Udaipur Varanasi
## 456 264
table(Date)
## Date
## 18-Dec-16 21-Dec-16 24-Dec-16 25-Dec-16 28-Dec-16 31-Dec-16
## 44 44 44 44 44 44
## 4-Jan-16 4-Jan-17 8-Jan-16 8-Jan-17 Dec 18 2016 Dec 21 2016
## 31 13 31 13 1607 1610
## Dec 24 2016 Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017
## 1610 1610 1610 1610 1547 1541
## Jan 4 2017 Jan 8 2017
## 60 67
table(IsTouristDestination)
## IsTouristDestination
## 0 1
## 3999 9225
library(car)
par(mfrow=c(1,1))
boxplot(log10(RoomRent)~CityName,data=hotel, main="Room Rent vs. City Name", ylab="City Name", xlab="Room Rent", horizontal=TRUE)
scatterplot(CityName,log10(RoomRent))
## [1] "7241" "7242" "7243" "7244" "7245" "7246" "7247" "7248"
## [9] "7281" "7470" "7617" "7618" "7619" "7620" "7621" "7622"
## [17] "7623" "7624" "4402" "4407" "4408" "4777" "10090" "10093"
## [25] "9147" "9162" "9338" "9339" "9342" "11777" "11778" "11779"
## [33] "11780" "11781" "11747" "11748" "11750" "11763" "11764" "11766"
## [41] "11771" "11772" "11773" "11774" "2038" "2040" "2182" "2425"
## [49] "2430" "8247" "8310" "8333" "8335" "8363" "8365" "8366"
## [57] "4233" "4234" "4235" "4236" "4237" "4238" "4239" "4240"
## [65] "6152" "6533" "6534" "6535" "6536" "6537" "6538" "6539"
## [73] "6488" "6489" "6490" "9034" "9035" "9036" "9037" "9038"
## [81] "9039" "10686" "10693" "10694" "10699" "10701" "10702" "7686"
## [89] "27" "182" "590" "646" "12704" "12745" "12752" "11702"
## [97] "11921" "11927" "11928" "9662" "9661" "9663" "9755" "9757"
## [105] "9758" "9659" "9660" "9579" "9580" "6761" "6762" "6763"
## [113] "6757" "6758" "8657" "8658" "8660" "8663" "8664" "8769"
## [121] "8770" "8772" "8776" "8659"
boxplot(log10(RoomRent)~Population,data=hotel, main="Room Rent vs. Population", horizontal=TRUE,xlab="log10(RoomRent)",ylab="Population")
scatterplot(Population,log10(RoomRent))
chisq.test(Population,RoomRent) # p-value < 2.2e-16
## Warning in chisq.test(Population, RoomRent): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: Population and RoomRent
## X-squared = 246000, df = 92579, p-value < 2.2e-16
t.test(Population,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: Population and RoomRent
## t = 119.18, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4341141 4486320
## sample estimates:
## mean of x mean of y
## 4419206.714 5476.028
boxplot(log10(RoomRent)~IsMetroCity,data=hotel, main="Room Rent vs. MetroCity", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsMetrocity")
scatterplot(IsMetroCity,log10(RoomRent))
aggregate(RoomRent, by=list(IsMetroCity), mean)
## Group.1 x
## 1 0 5785.900
## 2 1 4696.073
chisq.test(IsMetroCity,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(IsMetroCity, RoomRent): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: IsMetroCity and RoomRent
## X-squared = 6441.3, df = 2153, p-value < 2.2e-16
t.test(IsMetroCity,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: IsMetroCity and RoomRent
## t = -85.848, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5600.769 -5350.718
## sample estimates:
## mean of x mean of y
## 0.2843315 5476.0278282
boxplot(log10(RoomRent)~IsTouristDestination,data=hotel, main="Room Rent vs. Tourist Destination", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsTouristDestination")
scatterplot(IsTouristDestination,log10(RoomRent))
aggregate(RoomRent, by=list(IsTouristDestination), mean)
## Group.1 x
## 1 0 4115.009
## 2 1 6066.024
chisq.test(IsTouristDestination,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(IsTouristDestination, RoomRent): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: IsTouristDestination and RoomRent
## X-squared = 6402.6, df = 2153, p-value < 2.2e-16
t.test(IsTouristDestination,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: IsTouristDestination and RoomRent
## t = -85.842, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5600.356 -5350.305
## sample estimates:
## mean of x mean of y
## 0.6975953 5476.0278282
boxplot(log10(RoomRent)~IsWeekend,data=hotel, main="Room Rent vs. Weekend", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsWeekend")
scatterplot(IsWeekend,log10(RoomRent))
aggregate(RoomRent, by=list(IsWeekend), mean)
## Group.1 x
## 1 0 5430.835
## 2 1 5503.424
chisq.test(IsWeekend,RoomRent) #p-value = 1
## Warning in chisq.test(IsWeekend, RoomRent): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: IsWeekend and RoomRent
## X-squared = 1583.5, df = 2153, p-value = 1
t.test(IsWeekend,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: IsWeekend and RoomRent
## t = -85.843, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5600.431 -5350.380
## sample estimates:
## mean of x mean of y
## 0.6225802 5476.0278282
boxplot(log10(RoomRent)~IsNewYearEve,data=hotel, main="Room Rent vs. NewYearEve", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsNewYearEve")
scatterplot(IsNewYearEve,log10(RoomRent))
aggregate(RoomRent, by=list(IsNewYearEve), mean)
## Group.1 x
## 1 0 5369.594
## 2 1 6225.203
chisq.test(IsNewYearEve,RoomRent) #p-value = 0.9474
## Warning in chisq.test(IsNewYearEve, RoomRent): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: IsNewYearEve and RoomRent
## X-squared = 2047.8, df = 2153, p-value = 0.9474
t.test(IsNewYearEve,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: IsNewYearEve and RoomRent
## t = -85.851, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5600.929 -5350.878
## sample estimates:
## mean of x mean of y
## 0.124395 5476.027828
boxplot(log10(RoomRent)~StarRating,data=hotel, main="Room Rent vs. StarRating", horizontal=TRUE,xlab="log10(RoomRent)",ylab="StarRating")
scatterplot(StarRating,log10(RoomRent))
aggregate(RoomRent, by=list(StarRating), mean)
## Group.1 x
## 1 0.0 7237.125
## 2 1.0 686.625
## 3 2.0 2783.166
## 4 2.5 2520.816
## 5 3.0 3696.945
## 6 3.2 15937.500
## 7 3.3 2841.062
## 8 3.4 23437.500
## 9 3.5 4843.346
## 10 3.6 7769.500
## 11 3.7 6701.958
## 12 3.8 5400.062
## 13 3.9 13062.750
## 14 4.0 6393.105
## 15 4.1 19075.000
## 16 4.3 7423.125
## 17 4.4 5563.500
## 18 4.5 8699.920
## 19 4.7 10125.000
## 20 4.8 46752.812
## 21 5.0 12398.221
chisq.test(StarRating,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(StarRating, RoomRent): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: StarRating and RoomRent
## X-squared = 132310, df = 43060, p-value < 2.2e-16
t.test(StarRating,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: StarRating and RoomRent
## t = -85.799, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5597.594 -5347.543
## sample estimates:
## mean of x mean of y
## 3.459211 5476.027828
boxplot(log10(RoomRent)~Airport,data=hotel, main="Room Rent vs. Airport", horizontal=TRUE,xlab="log10(RoomRent)",ylab="Distance of Airport")
scatterplot(Airport,log10(RoomRent))
aggregate(RoomRent, by=list(Airport), mean)
## Group.1 x
## 1 0.2 5247.000
## 2 0.3 2217.500
## 3 0.4 4987.025
## 4 0.5 2437.458
## 5 0.6 4805.781
## 6 0.7 2894.750
## 7 0.8 4687.500
## 8 0.9 4356.128
## 9 1.0 4452.812
## 10 1.1 43487.500
## 11 1.2 5154.050
## 12 1.4 15562.500
## 13 1.5 7273.250
## 14 1.6 10460.469
## 15 1.7 4968.091
## 16 1.8 3298.681
## 17 1.9 6440.400
## 18 2.0 5804.821
## 19 2.1 3400.975
## 20 2.2 4448.000
## 21 2.3 8030.938
## 22 2.4 5419.656
## 23 2.5 5863.000
## 24 2.6 3027.396
## 25 2.7 4659.054
## 26 2.8 4563.417
## 27 2.9 3667.232
## 28 3.0 4636.929
## 29 3.1 5859.875
## 30 3.2 7848.667
## 31 3.3 8960.125
## 32 3.4 6088.854
## 33 3.5 4267.839
## 34 3.6 6699.828
## 35 3.7 2603.125
## 36 3.8 3145.825
## 37 3.9 4075.250
## 38 4.0 4013.944
## 39 4.1 4218.688
## 40 4.2 3857.075
## 41 4.3 4388.656
## 42 4.4 5525.062
## 43 4.5 6135.208
## 44 4.6 5061.025
## 45 4.7 2378.125
## 46 4.8 3889.250
## 47 4.9 3421.219
## 48 5.0 6144.041
## 49 5.1 6547.000
## 50 5.2 4390.306
## 51 5.3 4119.250
## 52 5.4 4705.900
## 53 5.5 4371.625
## 54 5.6 5430.900
## 55 5.7 6046.594
## 56 5.8 5625.607
## 57 5.9 5016.650
## 58 6.0 3875.970
## 59 6.1 5853.375
## 60 6.2 2701.828
## 61 6.3 1705.125
## 62 6.4 3212.479
## 63 6.5 4871.208
## 64 6.6 3623.625
## 65 6.7 2994.292
## 66 6.8 4132.357
## 67 6.9 2749.950
## 68 7.0 4028.469
## 69 7.1 2366.806
## 70 7.2 3611.604
## 71 7.3 2522.042
## 72 7.4 3571.975
## 73 7.5 4816.667
## 74 7.6 4420.141
## 75 7.7 27828.708
## 76 7.8 5656.594
## 77 7.9 2730.958
## 78 8.0 9879.685
## 79 8.1 5120.639
## 80 8.2 3684.607
## 81 8.3 5117.825
## 82 8.4 4164.208
## 83 8.5 2991.688
## 84 8.6 1378.938
## 85 8.7 4457.446
## 86 8.8 3498.562
## 87 8.9 2627.250
## 88 9.0 3618.694
## 89 9.1 5122.458
## 90 9.2 9520.790
## 91 9.3 4147.375
## 92 9.4 5233.500
## 93 9.5 8224.909
## 94 9.6 3226.050
## 95 9.7 4376.083
## 96 9.8 4045.625
## 97 9.9 7947.732
## 98 10.0 5184.302
## 99 10.2 2780.000
## 100 10.3 2587.000
## 101 10.4 2346.000
## 102 10.6 1574.375
## 103 10.7 7025.000
## 104 10.8 12157.875
## 105 10.9 1949.812
## 106 11.0 4699.239
## 107 11.1 2867.250
## 108 11.3 1948.812
## 109 11.7 4069.000
## 110 11.9 7264.938
## 111 12.0 5014.164
## 112 12.2 3113.458
## 113 12.3 1746.750
## 114 12.6 4241.000
## 115 12.7 4566.750
## 116 13.0 6872.332
## 117 13.1 2525.000
## 118 13.3 4881.250
## 119 13.5 1831.250
## 120 13.6 4371.333
## 121 13.7 5908.812
## 122 13.8 2507.500
## 123 14.0 3632.243
## 124 14.2 1801.000
## 125 14.4 4002.167
## 126 14.5 3847.500
## 127 14.6 6711.438
## 128 14.7 5431.167
## 129 14.8 7086.625
## 130 14.9 4631.250
## 131 15.0 4804.245
## 132 15.3 2983.875
## 133 15.4 5179.125
## 134 15.6 4233.375
## 135 15.7 3385.250
## 136 15.8 5960.500
## 137 15.9 9961.875
## 138 16.0 5052.724
## 139 16.1 10451.000
## 140 16.2 4637.250
## 141 16.4 2404.250
## 142 16.5 4639.250
## 143 16.7 6648.281
## 144 17.0 5245.613
## 145 17.1 3251.000
## 146 17.2 4874.500
## 147 17.4 1911.750
## 148 17.5 16538.125
## 149 17.6 6273.000
## 150 17.8 4139.438
## 151 18.0 5023.542
## 152 18.3 6125.000
## 153 18.5 3543.250
## 154 18.6 6693.750
## 155 18.7 2782.625
## 156 19.0 10216.920
## 157 19.5 2262.500
## 158 19.9 7232.500
## 159 20.0 5474.096
## 160 20.2 8412.500
## 161 20.3 3930.812
## 162 20.5 2169.625
## 163 20.9 6281.750
## 164 21.0 4546.419
## 165 21.4 6944.500
## 166 21.5 3882.750
## 167 22.0 4453.590
## 168 22.1 5305.000
## 169 22.2 3235.000
## 170 22.4 3887.500
## 171 22.5 6103.250
## 172 23.0 5019.740
## 173 23.2 10887.500
## 174 23.3 5088.000
## 175 23.4 4942.375
## 176 24.0 3863.335
## 177 24.2 38115.625
## 178 24.3 16894.500
## 179 24.5 5305.750
## 180 24.6 45274.375
## 181 24.7 2078.000
## 182 24.9 20867.438
## 183 25.0 5229.457
## 184 25.6 7140.625
## 185 25.7 6137.500
## 186 25.9 15937.500
## 187 26.0 6258.703
## 188 26.1 26156.250
## 189 26.3 2369.250
## 190 26.4 7483.000
## 191 26.5 6112.500
## 192 26.7 7992.500
## 193 27.0 5835.206
## 194 27.1 23437.500
## 195 27.2 4832.000
## 196 28.0 3282.277
## 197 28.1 7140.625
## 198 28.6 7518.750
## 199 28.7 3781.625
## 200 29.0 3602.364
## 201 30.0 5784.393
## 202 30.5 20500.000
## 203 31.0 4943.406
## 204 31.2 6193.750
## 205 31.3 9125.000
## 206 31.9 4204.750
## 207 32.0 5803.528
## 208 32.9 7936.875
## 209 33.0 3026.100
## 210 33.4 6292.000
## 211 34.0 5784.875
## 212 35.0 8111.898
## 213 36.0 7528.882
## 214 36.2 6871.500
## 215 37.0 8712.878
## 216 38.0 6006.755
## 217 38.3 8117.875
## 218 39.0 4524.650
## 219 39.9 2206.500
## 220 40.0 5576.768
## 221 41.0 5355.676
## 222 42.0 3292.293
## 223 42.7 4118.750
## 224 43.0 7559.758
## 225 43.9 9247.500
## 226 44.0 5925.000
## 227 44.5 4233.125
## 228 44.6 7147.000
## 229 44.8 33033.500
## 230 46.0 4236.850
## 231 47.0 7256.000
## 232 47.5 19108.125
## 233 48.0 4268.750
## 234 48.4 3000.000
## 235 49.0 18237.500
## 236 50.0 5681.875
## 237 50.1 2360.875
## 238 50.5 3417.750
## 239 51.0 3178.250
## 240 52.0 4198.375
## 241 52.7 7820.000
## 242 53.0 4062.500
## 243 55.0 18950.000
## 244 57.2 15375.000
## 245 60.0 2846.000
## 246 61.0 14319.062
## 247 62.0 5412.719
## 248 63.0 8687.500
## 249 63.5 3900.000
## 250 63.6 2625.000
## 251 65.0 6257.888
## 252 67.6 4149.750
## 253 69.0 2682.125
## 254 73.1 3172.500
## 255 80.0 2554.000
## 256 80.3 1117.750
## 257 81.0 2554.000
## 258 82.0 6717.111
## 259 83.0 2554.000
## 260 84.0 2554.000
## 261 85.0 2554.000
## 262 86.0 2554.000
## 263 87.0 2554.000
## 264 91.3 1758.875
## 265 96.5 3821.375
## 266 100.0 6144.257
## 267 102.4 6444.750
## 268 105.0 8162.371
## 269 110.0 5976.109
## 270 117.4 6337.375
## 271 124.0 4629.648
chisq.test(Airport,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(Airport, RoomRent): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: Airport and RoomRent
## X-squared = 1583700, df = 581310, p-value < 2.2e-16
t.test(Airport,RoomRent) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: Airport and RoomRent
## t = -85.521, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5579.889 -5329.837
## sample estimates:
## mean of x mean of y
## 21.16489 5476.02783
boxplot(log10(RoomRent)~FreeWifi,data=hotel, main="Room Rent vs. FreeWifi", horizontal=TRUE,xlab="log10(RoomRent)",ylab="FreeWifi")
scatterplot(FreeWifi,log10(RoomRent))
aggregate(RoomRent, by=list(FreeWifi), mean)
## Group.1 x
## 1 0 5380.004
## 2 1 5483.722
chisq.test(FreeWifi,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(FreeWifi, RoomRent): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: FreeWifi and RoomRent
## X-squared = 5910.4, df = 2153, p-value < 2.2e-16
t.test(RoomRent~FreeWifi) #p-value= 0.4325
##
## Welch Two Sample t-test
##
## data: RoomRent by FreeWifi
## t = -0.78503, df = 1805.8, p-value = 0.4325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -362.8424 155.4066
## sample estimates:
## mean in group 0 mean in group 1
## 5380.004 5483.722
boxplot(log10(RoomRent)~FreeBreakfast,data = hotel, main="Room Rent vs Freebreakfast", horizontal= TRUE,xlab="log10(RoomRent)",ylab="FreeBreakfast")
scatterplot(FreeBreakfast,log10(RoomRent))
aggregate(RoomRent, by=list(FreeBreakfast), mean)
## Group.1 x
## 1 0 5573.790
## 2 1 5423.131
chisq.test(FreeBreakfast, RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(FreeBreakfast, RoomRent): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: FreeBreakfast and RoomRent
## X-squared = 6488.9, df = 2153, p-value < 2.2e-16
t.test(RoomRent~FreeBreakfast) #p-value= 0.3365
##
## Welch Two Sample t-test
##
## data: RoomRent by FreeBreakfast
## t = 0.96115, df = 6214.6, p-value = 0.3365
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -156.6221 457.9393
## sample estimates:
## mean in group 0 mean in group 1
## 5573.790 5423.131
boxplot(log10(RoomRent)~HotelCapacity,data = hotel, main= "Room Rent vs Hotel Capacity", horizontal= TRUE,xlab="log10(RoomRent)",ylab="HotelCapacity")
scatterplot(HotelCapacity,log10(RoomRent))
aggregate(RoomRent,by=list(HotelCapacity), mean)
## Group.1 x
## 1 1 5613.062
## 2 2 9757.688
## 3 3 4295.188
## 4 4 3174.438
## 5 5 3216.144
## 6 6 3956.319
## 7 7 2398.719
## 8 8 3799.724
## 9 9 6148.659
## 10 10 5846.000
## 11 11 4153.733
## 12 12 4853.848
## 13 13 4817.900
## 14 14 7621.406
## 15 15 3714.794
## 16 16 6343.458
## 17 17 5198.688
## 18 18 3020.744
## 19 19 5436.503
## 20 20 3363.311
## 21 21 3169.528
## 22 22 3575.667
## 23 23 3714.547
## 24 24 4055.228
## 25 25 2966.417
## 26 26 3476.444
## 27 27 4289.575
## 28 28 4625.722
## 29 29 7759.860
## 30 30 4561.662
## 31 31 6490.971
## 32 32 4173.807
## 33 33 5002.913
## 34 34 4478.892
## 35 35 3762.562
## 36 36 5194.946
## 37 37 3272.054
## 38 38 3212.292
## 39 39 6695.972
## 40 40 4575.496
## 41 41 4880.479
## 42 42 4291.326
## 43 43 5459.178
## 44 44 3930.445
## 45 45 3882.049
## 46 46 4824.778
## 47 47 4653.042
## 48 48 4245.281
## 49 49 4665.823
## 50 50 6566.019
## 51 51 4099.300
## 52 52 4174.858
## 53 53 3646.833
## 54 54 5537.889
## 55 55 8833.153
## 56 56 3885.562
## 57 57 4895.875
## 58 58 5659.500
## 59 59 10682.646
## 60 60 6806.430
## 61 61 6117.575
## 62 62 6533.833
## 63 63 5170.375
## 64 64 15013.107
## 65 65 9644.000
## 66 66 5683.075
## 67 67 5756.163
## 68 68 6253.438
## 69 69 4759.646
## 70 70 8099.875
## 71 71 15643.000
## 72 72 7467.181
## 73 73 5314.969
## 74 74 2347.562
## 75 75 9944.344
## 76 76 3903.875
## 77 77 6125.896
## 78 78 10787.500
## 79 79 48024.583
## 80 80 8503.034
## 81 81 4579.938
## 82 82 4553.150
## 83 83 17432.562
## 84 84 12533.333
## 85 85 9676.047
## 86 86 4999.000
## 87 87 17063.625
## 88 88 8249.271
## 89 89 3734.375
## 90 90 7072.500
## 91 91 2812.125
## 92 92 5826.229
## 93 93 5771.167
## 94 94 6662.812
## 95 95 5801.938
## 96 96 10487.312
## 97 97 5798.750
## 98 98 7016.958
## 99 99 4659.125
## 100 100 5841.987
## 101 101 2845.875
## 102 102 14709.075
## 103 103 5898.500
## 104 104 6792.375
## 105 106 6668.167
## 106 107 5448.750
## 107 108 5265.333
## 108 109 3445.875
## 109 110 7293.042
## 110 111 4630.625
## 111 112 6043.484
## 112 113 4179.000
## 113 114 5743.417
## 114 115 4081.375
## 115 117 6775.312
## 116 119 5657.958
## 117 120 6593.750
## 118 121 3015.500
## 119 122 5936.562
## 120 124 5790.000
## 121 126 8693.812
## 122 127 4864.250
## 123 128 5245.500
## 124 129 5742.325
## 125 130 7160.677
## 126 132 7014.857
## 127 133 4765.625
## 128 134 6598.750
## 129 135 6650.062
## 130 136 6250.000
## 131 137 8918.938
## 132 138 5580.000
## 133 139 4432.125
## 134 140 7414.000
## 135 141 8150.938
## 136 142 5625.000
## 137 144 6138.438
## 138 145 12424.375
## 139 147 5384.500
## 140 148 4499.000
## 141 149 6031.333
## 142 150 5850.792
## 143 151 4931.083
## 144 153 6442.933
## 145 154 10755.417
## 146 155 4459.688
## 147 159 2384.250
## 148 160 9771.667
## 149 162 4500.000
## 150 164 8049.833
## 151 165 3993.625
## 152 166 5681.250
## 153 167 6647.500
## 154 170 11875.000
## 155 171 4403.375
## 156 172 6347.375
## 157 173 4448.688
## 158 176 5950.000
## 159 177 7280.250
## 160 178 5398.375
## 161 179 11840.938
## 162 180 4574.000
## 163 181 4984.458
## 164 182 5875.000
## 165 183 18552.250
## 166 184 6393.125
## 167 187 5262.500
## 168 189 7275.000
## 169 190 3621.571
## 170 191 4931.625
## 171 195 6783.750
## 172 196 3659.000
## 173 197 6693.750
## 174 198 5437.625
## 175 199 7018.750
## 176 200 8482.792
## 177 201 8281.250
## 178 202 13359.375
## 179 203 4900.000
## 180 204 7338.250
## 181 205 2045.875
## 182 207 10330.875
## 183 208 7666.417
## 184 209 4900.000
## 185 210 5625.000
## 186 211 7140.042
## 187 212 10375.000
## 188 214 4626.333
## 189 215 8186.750
## 190 216 7320.000
## 191 218 6683.250
## 192 220 6828.250
## 193 222 6187.500
## 194 223 5432.500
## 195 228 4837.500
## 196 229 10887.500
## 197 230 4886.500
## 198 231 9127.750
## 199 232 3300.000
## 200 233 9968.750
## 201 234 5656.875
## 202 235 14850.000
## 203 236 5875.000
## 204 237 6375.000
## 205 240 15937.500
## 206 244 6526.292
## 207 247 6416.625
## 208 248 6493.750
## 209 251 10125.000
## 210 254 17025.625
## 211 255 18237.500
## 212 260 8823.750
## 213 261 8406.250
## 214 264 7397.938
## 215 267 4776.000
## 216 269 7065.250
## 217 270 8042.875
## 218 273 9375.000
## 219 279 9874.375
## 220 281 11187.500
## 221 286 6709.500
## 222 287 10864.188
## 223 292 10184.375
## 224 293 6523.875
## 225 302 11187.500
## 226 310 6052.125
## 227 311 4249.000
## 228 317 8716.667
## 229 323 9750.000
## 230 324 10812.500
## 231 326 8887.500
## 232 327 6508.333
## 233 334 7068.125
## 234 340 6125.000
## 235 341 9280.375
## 236 385 6861.500
## 237 390 9031.125
## 238 393 4581.875
## 239 400 6298.750
## 240 403 12157.875
## 241 411 8775.000
## 242 414 7165.250
## 243 419 5500.000
## 244 436 8993.750
## 245 451 6300.000
## 246 461 7429.125
## 247 480 8061.500
## 248 493 13348.250
## 249 507 8562.500
## 250 523 9543.750
## 251 550 6741.500
## 252 560 14317.188
## 253 600 6928.750
chisq.test(HotelCapacity, RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(HotelCapacity, RoomRent): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: HotelCapacity and RoomRent
## X-squared = 1468500, df = 542560, p-value < 2.2e-16
t.test(RoomRent, HotelCapacity) #p-value < 2.2e-16
##
## Welch Two Sample t-test
##
## data: RoomRent and HotelCapacity
## t = 84.868, df = 13226, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5288.446 5538.511
## sample estimates:
## mean of x mean of y
## 5476.02783 62.54946
boxplot(log10(RoomRent)~HasSwimmingPool,data = hotel, main= "Room Rent vs HasSwimmingPool", horizontal= TRUE,xlab="log10(RoomRent)",ylab="HasSwimmimgPool")
scatterplot(HasSwimmingPool,log10(RoomRent))
aggregate(RoomRent,by=list(HasSwimmingPool), mean)
## Group.1 x
## 1 0 3777.132
## 2 1 8549.052
chisq.test(HasSwimmingPool, RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(HasSwimmingPool, RoomRent): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: HasSwimmingPool and RoomRent
## X-squared = 8387.9, df = 2153, p-value < 2.2e-16
t.test(RoomRent, HasSwimmingPool) #p-value= 0.3365
##
## Welch Two Sample t-test
##
## data: RoomRent and HasSwimmingPool
## t = 85.847, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5350.646 5600.697
## sample estimates:
## mean of x mean of y
## 5476.0278282 0.3560194
boxplot(StarRating~FreeWifi, data=hotel, main="StarRating vs Freewifi",horizontal=TRUE,xlab="StarRating",ylab="Freewifi")
scatterplot(FreeWifi,StarRating)
aggregate(StarRating, by=list(FreeWifi),mean)
## Group.1 x
## 1 0 3.410805
## 2 1 3.463089
chisq.test(StarRating,FreeWifi) #p-value=2.2e-16
## Warning in chisq.test(StarRating, FreeWifi): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: StarRating and FreeWifi
## X-squared = 217.34, df = 20, p-value < 2.2e-16
boxplot(StarRating~FreeBreakfast, data=hotel, main="StarRating vs FreeBreakfast",horizontal=TRUE,xlab="StarRating",ylab="FreeBreakfast")
scatterplot(StarRating,FreeBreakfast)
aggregate(StarRating, by=list(FreeBreakfast),mean)
## Group.1 x
## 1 0 3.492763
## 2 1 3.441056
chisq.test(StarRating,FreeBreakfast) #p-value=2.2e-16
## Warning in chisq.test(StarRating, FreeBreakfast): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: StarRating and FreeBreakfast
## X-squared = 435.38, df = 20, p-value < 2.2e-16
boxplot(StarRating~HotelCapacity, data=hotel, main="StarRating vs HotelCapacity",horizontal=TRUE,xlab="StarRating",ylab="HotelCapacity")
scatterplot(HotelCapacity,StarRating)
aggregate(StarRating, by=list(HotelCapacity),mean)
## Group.1 x
## 1 1 3.750000
## 2 2 4.000000
## 3 3 2.500000
## 4 4 2.875000
## 5 5 2.923077
## 6 6 2.722222
## 7 7 2.625000
## 8 8 2.868421
## 9 9 3.090909
## 10 10 2.986564
## 11 11 3.133333
## 12 12 2.928571
## 13 13 2.900000
## 14 14 2.800000
## 15 15 2.972503
## 16 16 3.000000
## 17 17 3.000000
## 18 18 2.904762
## 19 19 3.079882
## 20 20 3.094595
## 21 21 3.081818
## 22 22 3.083333
## 23 23 2.968750
## 24 24 3.148789
## 25 25 3.050000
## 26 26 3.027778
## 27 27 3.275000
## 28 28 3.136364
## 29 29 3.511628
## 30 30 3.332432
## 31 31 3.470588
## 32 32 3.242424
## 33 33 3.230769
## 34 34 3.233333
## 35 35 3.000000
## 36 36 3.223810
## 37 37 3.285714
## 38 38 3.200000
## 39 39 3.416667
## 40 40 3.468750
## 41 41 3.416667
## 42 42 3.333333
## 43 43 3.726027
## 44 44 3.375000
## 45 45 3.222222
## 46 46 3.444444
## 47 47 3.666667
## 48 48 3.475000
## 49 49 3.166667
## 50 50 3.653846
## 51 51 3.700000
## 52 52 3.500000
## 53 53 3.333333
## 54 54 3.466667
## 55 55 3.888889
## 56 56 3.750000
## 57 57 3.600000
## 58 58 3.800000
## 59 59 4.000000
## 60 60 3.406250
## 61 61 3.900000
## 62 62 3.833333
## 63 63 4.000000
## 64 64 3.842857
## 65 65 3.672727
## 66 66 3.600000
## 67 67 3.550000
## 68 68 3.250000
## 69 69 3.666667
## 70 70 3.555556
## 71 71 3.266667
## 72 72 3.611111
## 73 73 3.375000
## 74 74 3.250000
## 75 75 4.500000
## 76 76 3.800000
## 77 77 3.833333
## 78 78 4.300000
## 79 79 3.833333
## 80 80 3.754545
## 81 81 3.875000
## 82 82 3.600000
## 83 83 3.950000
## 84 84 4.333333
## 85 85 4.312500
## 86 86 4.000000
## 87 87 4.250000
## 88 88 3.916667
## 89 89 4.000000
## 90 90 4.000000
## 91 91 3.000000
## 92 92 3.666667
## 93 93 4.000000
## 94 94 3.875000
## 95 95 4.000000
## 96 96 4.000000
## 97 97 4.500000
## 98 98 4.083333
## 99 99 4.125000
## 100 100 3.697436
## 101 101 3.500000
## 102 102 4.300000
## 103 103 3.500000
## 104 104 4.250000
## 105 106 4.000000
## 106 107 3.666667
## 107 108 4.250000
## 108 109 3.750000
## 109 110 4.166667
## 110 111 4.000000
## 111 112 4.125000
## 112 113 4.000000
## 113 114 4.000000
## 114 115 4.000000
## 115 117 4.000000
## 116 119 4.333333
## 117 120 4.000000
## 118 121 3.900000
## 119 122 4.000000
## 120 124 5.000000
## 121 126 4.500000
## 122 127 4.500000
## 123 128 4.000000
## 124 129 4.300000
## 125 130 4.112903
## 126 132 4.214286
## 127 133 4.000000
## 128 134 4.000000
## 129 135 4.000000
## 130 136 5.000000
## 131 137 4.625000
## 132 138 4.000000
## 133 139 4.500000
## 134 140 4.300000
## 135 141 4.075000
## 136 142 4.333333
## 137 144 3.750000
## 138 145 4.666667
## 139 147 4.000000
## 140 148 4.000000
## 141 149 3.833333
## 142 150 3.716667
## 143 151 3.433333
## 144 153 4.000000
## 145 154 4.666667
## 146 155 3.950000
## 147 159 4.000000
## 148 160 4.333333
## 149 162 4.000000
## 150 164 4.666667
## 151 165 3.500000
## 152 166 4.000000
## 153 167 4.500000
## 154 170 4.500000
## 155 171 4.500000
## 156 172 4.166667
## 157 173 3.750000
## 158 176 4.000000
## 159 177 3.750000
## 160 178 4.625000
## 161 179 5.000000
## 162 180 3.500000
## 163 181 4.333333
## 164 182 5.000000
## 165 183 4.000000
## 166 184 4.500000
## 167 187 5.000000
## 168 189 5.000000
## 169 190 4.000000
## 170 191 5.000000
## 171 195 5.000000
## 172 196 3.000000
## 173 197 4.000000
## 174 198 4.000000
## 175 199 5.000000
## 176 200 4.500000
## 177 201 5.000000
## 178 202 5.000000
## 179 203 5.000000
## 180 204 4.500000
## 181 205 3.000000
## 182 207 4.000000
## 183 208 5.000000
## 184 209 4.500000
## 185 210 5.000000
## 186 211 4.666667
## 187 212 5.000000
## 188 214 3.833333
## 189 215 5.000000
## 190 216 5.000000
## 191 218 5.000000
## 192 220 4.333333
## 193 222 5.000000
## 194 223 4.500000
## 195 228 5.000000
## 196 229 5.000000
## 197 230 4.000000
## 198 231 5.000000
## 199 232 4.000000
## 200 233 5.000000
## 201 234 5.000000
## 202 235 5.000000
## 203 236 4.000000
## 204 237 5.000000
## 205 240 3.200000
## 206 244 4.500000
## 207 247 5.000000
## 208 248 4.500000
## 209 251 4.700000
## 210 254 5.000000
## 211 255 5.000000
## 212 260 4.300000
## 213 261 5.000000
## 214 264 5.000000
## 215 267 4.000000
## 216 269 5.000000
## 217 270 5.000000
## 218 273 5.000000
## 219 279 5.000000
## 220 281 5.000000
## 221 286 5.000000
## 222 287 5.000000
## 223 292 5.000000
## 224 293 5.000000
## 225 302 5.000000
## 226 310 5.000000
## 227 311 4.000000
## 228 317 5.000000
## 229 323 5.000000
## 230 324 5.000000
## 231 326 5.000000
## 232 327 5.000000
## 233 334 4.000000
## 234 340 5.000000
## 235 341 5.000000
## 236 385 5.000000
## 237 390 5.000000
## 238 393 5.000000
## 239 400 5.000000
## 240 403 5.000000
## 241 411 5.000000
## 242 414 5.000000
## 243 419 4.000000
## 244 436 5.000000
## 245 451 5.000000
## 246 461 5.000000
## 247 480 5.000000
## 248 493 5.000000
## 249 507 5.000000
## 250 523 5.000000
## 251 550 5.000000
## 252 560 5.000000
## 253 600 5.000000
chisq.test(StarRating,HotelCapacity) #p-value<2.2e-16
## Warning in chisq.test(StarRating, HotelCapacity): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: StarRating and HotelCapacity
## X-squared = 77127, df = 5040, p-value < 2.2e-16
boxplot(StarRating~HasSwimmingPool, data=hotel, main="StarRating vs HasSwimmingPool",horizontal=TRUE,xlab="StarRating",ylab="HasSwimmimgPool")
scatterplot(HasSwimmingPool,StarRating)
aggregate(StarRating, by=list(HasSwimmingPool),mean)
## Group.1 x
## 1 0 3.111602
## 2 1 4.087978
chisq.test(StarRating,HasSwimmingPool) #p-value=2.2e-16
## Warning in chisq.test(StarRating, HasSwimmingPool): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: StarRating and HasSwimmingPool
## X-squared = 5804.9, df = 20, p-value < 2.2e-16
scatterplotMatrix(~RoomRent+CityRank+Population+IsMetroCity+IsTouristDestination+IsWeekend+IsNewYearEve,data=hotel, main="Scatterplot for Various Variables")
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
scatterplotMatrix(~RoomRent+StarRating+Airport+FreeWifi+FreeBreakfast+HotelCapacity+HasSwimmingPool, data = hotel,main="Scatterplot for Various Variables")
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
library(corrgram)
corrgram(hotel, order=TRUE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt, main= "Hotel Rent Analysis Correlogram" )
a<-cor(hotel[,c(3:8,11:13,17:21)])
round(a,2)
## Population CityRank IsMetroCity IsTouristDestination
## Population 1.00 -0.84 0.77 -0.05
## CityRank -0.84 1.00 -0.56 0.28
## IsMetroCity 0.77 -0.56 1.00 0.18
## IsTouristDestination -0.05 0.28 0.18 1.00
## IsWeekend 0.01 -0.01 0.00 -0.02
## IsNewYearEve 0.00 0.00 0.00 0.00
## RoomRent -0.09 0.09 -0.07 0.12
## StarRating 0.13 -0.13 0.08 -0.04
## Airport -0.26 0.51 -0.21 0.19
## FreeWifi 0.11 -0.12 0.09 -0.06
## FreeBreakfast 0.04 -0.01 0.05 -0.07
## HotelCapacity 0.26 -0.26 0.19 -0.10
## HasSwimmingPool 0.03 -0.10 0.02 0.04
## RentCategory -0.08 0.15 -0.04 0.17
## IsWeekend IsNewYearEve RoomRent StarRating Airport
## Population 0.01 0.00 -0.09 0.13 -0.26
## CityRank -0.01 0.00 0.09 -0.13 0.51
## IsMetroCity 0.00 0.00 -0.07 0.08 -0.21
## IsTouristDestination -0.02 0.00 0.12 -0.04 0.19
## IsWeekend 1.00 0.29 0.00 0.01 0.00
## IsNewYearEve 0.29 1.00 0.04 0.00 0.00
## RoomRent 0.00 0.04 1.00 0.37 0.05
## StarRating 0.01 0.00 0.37 1.00 -0.06
## Airport 0.00 0.00 0.05 -0.06 1.00
## FreeWifi 0.00 0.00 0.00 0.02 -0.09
## FreeBreakfast -0.01 0.00 -0.01 -0.03 0.02
## HotelCapacity 0.01 0.00 0.16 0.64 -0.12
## HasSwimmingPool 0.00 0.00 0.31 0.62 -0.14
## RentCategory 0.01 0.04 0.60 0.55 0.15
## FreeWifi FreeBreakfast HotelCapacity HasSwimmingPool
## Population 0.11 0.04 0.26 0.03
## CityRank -0.12 -0.01 -0.26 -0.10
## IsMetroCity 0.09 0.05 0.19 0.02
## IsTouristDestination -0.06 -0.07 -0.10 0.04
## IsWeekend 0.00 -0.01 0.01 0.00
## IsNewYearEve 0.00 0.00 0.00 0.00
## RoomRent 0.00 -0.01 0.16 0.31
## StarRating 0.02 -0.03 0.64 0.62
## Airport -0.09 0.02 -0.12 -0.14
## FreeWifi 1.00 0.16 -0.01 -0.02
## FreeBreakfast 0.16 1.00 -0.09 -0.06
## HotelCapacity -0.01 -0.09 1.00 0.51
## HasSwimmingPool -0.02 -0.06 0.51 1.00
## RentCategory -0.04 0.03 0.36 0.47
## RentCategory
## Population -0.08
## CityRank 0.15
## IsMetroCity -0.04
## IsTouristDestination 0.17
## IsWeekend 0.01
## IsNewYearEve 0.04
## RoomRent 0.60
## StarRating 0.55
## Airport 0.15
## FreeWifi -0.04
## FreeBreakfast 0.03
## HotelCapacity 0.36
## HasSwimmingPool 0.47
## RentCategory 1.00
colnames(hotel)
## [1] "X" "CityName" "Population"
## [4] "CityRank" "IsMetroCity" "IsTouristDestination"
## [7] "IsWeekend" "IsNewYearEve" "Date"
## [10] "HotelName" "RoomRent" "StarRating"
## [13] "Airport" "HotelAddress" "HotelPincode"
## [16] "HotelDescription" "FreeWifi" "FreeBreakfast"
## [19] "HotelCapacity" "HasSwimmingPool" "RentCategory"
M1<- lm(RoomRent~Population+CityRank+IsMetroCity+IsTouristDestination+IsWeekend+IsNewYearEve+StarRating+Airport+FreeBreakfast+FreeWifi+HotelCapacity+HasSwimmingPool)
summary(M1)
##
## Call:
## lm(formula = RoomRent ~ Population + CityRank + IsMetroCity +
## IsTouristDestination + IsWeekend + IsNewYearEve + StarRating +
## Airport + FreeBreakfast + FreeWifi + HotelCapacity + HasSwimmingPool)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11847 -2358 -691 1030 309691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.605e+03 4.495e+02 -19.143 < 2e-16 ***
## Population -1.186e-04 3.593e-05 -3.300 0.000969 ***
## CityRank 1.983e+00 1.036e+01 0.191 0.848170
## IsMetroCity -6.633e+02 2.165e+02 -3.064 0.002189 **
## IsTouristDestination 1.922e+03 1.483e+02 12.962 < 2e-16 ***
## IsWeekend -8.992e+01 1.239e+02 -0.726 0.468058
## IsNewYearEve 8.827e+02 1.819e+02 4.852 1.24e-06 ***
## StarRating 3.592e+03 1.108e+02 32.424 < 2e-16 ***
## Airport 9.473e+00 3.173e+00 2.985 0.002839 **
## FreeBreakfast 1.693e+02 1.234e+02 1.372 0.169948
## FreeWifi 5.502e+02 2.243e+02 2.453 0.014183 *
## HotelCapacity -1.028e+01 1.034e+00 -9.945 < 2e-16 ***
## HasSwimmingPool 2.153e+03 1.616e+02 13.322 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6602 on 13211 degrees of freedom
## Multiple R-squared: 0.1905, Adjusted R-squared: 0.1897
## F-statistic: 259 on 12 and 13211 DF, p-value: < 2.2e-16
M2 <- update(M1, .~. - CityRank-IsWeekend-FreeBreakfast-FreeWifi)
summary(M2)
##
## Call:
## lm(formula = RoomRent ~ Population + IsMetroCity + IsTouristDestination +
## IsNewYearEve + StarRating + Airport + HotelCapacity + HasSwimmingPool)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11798 -2359 -704 1034 309572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.036e+03 3.563e+02 -22.556 < 2e-16 ***
## Population -1.217e-04 2.262e-05 -5.378 7.66e-08 ***
## IsMetroCity -6.261e+02 2.133e+02 -2.936 0.003333 **
## IsTouristDestination 1.898e+03 1.374e+02 13.814 < 2e-16 ***
## IsNewYearEve 8.433e+02 1.740e+02 4.846 1.27e-06 ***
## StarRating 3.613e+03 1.104e+02 32.733 < 2e-16 ***
## Airport 9.533e+00 2.712e+00 3.515 0.000441 ***
## HotelCapacity -1.055e+01 1.028e+00 -10.269 < 2e-16 ***
## HasSwimmingPool 2.133e+03 1.598e+02 13.346 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6604 on 13215 degrees of freedom
## Multiple R-squared: 0.1899, Adjusted R-squared: 0.1894
## F-statistic: 387.2 on 8 and 13215 DF, p-value: < 2.2e-16
library(leaps)
M2 <- regsubsets(RoomRent ~ Population + IsTouristDestination + IsWeekend + IsNewYearEve + StarRating + Airport + HotelCapacity + FreeWifi + FreeBreakfast,data=hotel, nbest=10)
plot(M2, scale="adjr2")
M3<- lm(RoomRent~Population+CityRank+IsMetroCity+IsTouristDestination+IsNewYearEve+StarRating+Airport+FreeBreakfast+FreeWifi+HotelCapacity+HasSwimmingPool)
summary(M3)
##
## Call:
## lm(formula = RoomRent ~ Population + CityRank + IsMetroCity +
## IsTouristDestination + IsNewYearEve + StarRating + Airport +
## FreeBreakfast + FreeWifi + HotelCapacity + HasSwimmingPool)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11847 -2362 -695 1024 309652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.654e+03 4.444e+02 -19.476 < 2e-16 ***
## Population -1.191e-04 3.593e-05 -3.315 0.000918 ***
## CityRank 1.861e+00 1.036e+01 0.180 0.857399
## IsMetroCity -6.622e+02 2.165e+02 -3.059 0.002228 **
## IsTouristDestination 1.925e+03 1.483e+02 12.981 < 2e-16 ***
## IsNewYearEve 8.440e+02 1.740e+02 4.852 1.24e-06 ***
## StarRating 3.592e+03 1.108e+02 32.425 < 2e-16 ***
## Airport 9.482e+00 3.173e+00 2.988 0.002810 **
## FreeBreakfast 1.702e+02 1.234e+02 1.380 0.167660
## FreeWifi 5.497e+02 2.243e+02 2.451 0.014253 *
## HotelCapacity -1.028e+01 1.034e+00 -9.943 < 2e-16 ***
## HasSwimmingPool 2.152e+03 1.616e+02 13.317 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6602 on 13212 degrees of freedom
## Multiple R-squared: 0.1904, Adjusted R-squared: 0.1898
## F-statistic: 282.5 on 11 and 13212 DF, p-value: < 2.2e-16