Project Title : Analysis of Room Rent of Hotels in 42 Cities

NAME : Devesh Patidar

EMAIL : devesh.bilpank@gmail.com

COLLEGE : IIT Bombay

Reading and Subsetting the Data

cities42 <- read.csv(paste("Cities42.csv",sep = ""))
View(cities42)
dim(cities42)
## [1] 13232    20
summary(cities42)
##        X              CityName      Population          CityRank    
##  Min.   :    1   Delhi    :2048   Min.   :    8096   Min.   : 0.00  
##  1st Qu.: 3309   Jaipur   : 768   1st Qu.:  744983   1st Qu.: 2.00  
##  Median : 6616   Mumbai   : 712   Median : 3046163   Median : 9.00  
##  Mean   : 6616   Bangalore: 656   Mean   : 4416837   Mean   :14.83  
##  3rd Qu.: 9924   Goa      : 624   3rd Qu.: 8443675   3rd Qu.:24.00  
##  Max.   :13232   Kochi    : 608   Max.   :12442373   Max.   :44.00  
##                  (Other)  :7816                                     
##   IsMetroCity     IsTouristDestination   IsWeekend       IsNewYearEve   
##  Min.   :0.0000   Min.   :0.0000       Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000       Median :1.0000   Median :0.0000  
##  Mean   :0.2842   Mean   :0.6972       Mean   :0.6228   Mean   :0.1244  
##  3rd Qu.:1.0000   3rd Qu.:1.0000       3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000       Max.   :1.0000   Max.   :1.0000  
##                                                                         
##           Date                       HotelName        RoomRent     
##  Dec 21 2016:1611   Vivanta by Taj        :   32   Min.   :   299  
##  Dec 24 2016:1611   Goldfinch Hotel       :   24   1st Qu.:  2436  
##  Dec 25 2016:1611   OYO Rooms             :   24   Median :  4000  
##  Dec 28 2016:1611   The Gordon House Hotel:   24   Mean   :  5474  
##  Dec 31 2016:1611   Apnayt Villa          :   16   3rd Qu.:  6299  
##  Dec 18 2016:1608   Bentleys Hotel Colaba :   16   Max.   :322500  
##  (Other)    :3569   (Other)               :13096                   
##    StarRating       Airport      
##  Min.   :0.000   Min.   :  0.20  
##  1st Qu.:3.000   1st Qu.:  8.40  
##  Median :3.000   Median : 15.00  
##  Mean   :3.459   Mean   : 21.16  
##  3rd Qu.:4.000   3rd Qu.: 24.00  
##  Max.   :5.000   Max.   :124.00  
##                                  
##                                                                    HotelAddress  
##  The Mall, Shimla                                                        :   32  
##  #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India:   16  
##  121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra         :   16  
##  14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta               :   16  
##  144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu        :   16  
##  17, Oliver Road, Colaba, Mumbai, Maharashtra                            :   16  
##  (Other)                                                                 :13120  
##   HotelPincode         HotelDescription    FreeWifi      FreeBreakfast   
##  Min.   : 100025   3           :  120   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 221001   Abc         :  112   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median : 395003   3-star hotel:  104   Median :1.0000   Median :1.0000  
##  Mean   : 397430   3.5         :   88   Mean   :0.9259   Mean   :0.6491  
##  3rd Qu.: 570001   4           :   72   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :7000157   (Other)     :12728   Max.   :1.0000   Max.   :1.0000  
##                    NA's        :    8                                    
##  HotelCapacity    HasSwimmingPool 
##  Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 16.00   1st Qu.:0.0000  
##  Median : 34.00   Median :0.0000  
##  Mean   : 62.51   Mean   :0.3558  
##  3rd Qu.: 75.00   3rd Qu.:1.0000  
##  Max.   :600.00   Max.   :1.0000  
## 
hotel <- subset(cities42, HotelCapacity!=0) #Removing bad data
View(hotel)
dim(hotel)
## [1] 13224    20
attach(hotel)

Categorising the Room Rent on a Scale of 5

summary(hotel)
##        X              CityName      Population          CityRank    
##  Min.   :    1   Delhi    :2048   Min.   :    8096   Min.   : 0.00  
##  1st Qu.: 3307   Jaipur   : 768   1st Qu.:  744983   1st Qu.: 2.00  
##  Median : 6612   Mumbai   : 712   Median : 3046163   Median : 9.00  
##  Mean   : 6614   Bangalore: 656   Mean   : 4419207   Mean   :14.83  
##  3rd Qu.: 9918   Goa      : 624   3rd Qu.: 8443675   3rd Qu.:24.00  
##  Max.   :13232   Kochi    : 608   Max.   :12442373   Max.   :44.00  
##                  (Other)  :7808                                     
##   IsMetroCity     IsTouristDestination   IsWeekend       IsNewYearEve   
##  Min.   :0.0000   Min.   :0.0000       Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000       Median :1.0000   Median :0.0000  
##  Mean   :0.2843   Mean   :0.6976       Mean   :0.6226   Mean   :0.1244  
##  3rd Qu.:1.0000   3rd Qu.:1.0000       3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000       Max.   :1.0000   Max.   :1.0000  
##                                                                         
##           Date                       HotelName        RoomRent     
##  Dec 21 2016:1610   Vivanta by Taj        :   32   Min.   :   299  
##  Dec 24 2016:1610   Goldfinch Hotel       :   24   1st Qu.:  2439  
##  Dec 25 2016:1610   OYO Rooms             :   24   Median :  4000  
##  Dec 28 2016:1610   The Gordon House Hotel:   24   Mean   :  5476  
##  Dec 31 2016:1610   Apnayt Villa          :   16   3rd Qu.:  6300  
##  Dec 18 2016:1607   Bentleys Hotel Colaba :   16   Max.   :322500  
##  (Other)    :3567   (Other)               :13088                   
##    StarRating       Airport      
##  Min.   :0.000   Min.   :  0.20  
##  1st Qu.:3.000   1st Qu.:  8.40  
##  Median :3.000   Median : 15.00  
##  Mean   :3.459   Mean   : 21.16  
##  3rd Qu.:4.000   3rd Qu.: 24.00  
##  Max.   :5.000   Max.   :124.00  
##                                  
##                                                                    HotelAddress  
##  The Mall, Shimla                                                        :   32  
##  #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India:   16  
##  121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra         :   16  
##  14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta               :   16  
##  144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu        :   16  
##  17, Oliver Road, Colaba, Mumbai, Maharashtra                            :   16  
##  (Other)                                                                 :13112  
##   HotelPincode         HotelDescription    FreeWifi      FreeBreakfast   
##  Min.   : 100025   3           :  120   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 221001   Abc         :  112   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median : 395003   3-star hotel:  104   Median :1.0000   Median :1.0000  
##  Mean   : 397323   3.5         :   88   Mean   :0.9258   Mean   :0.6489  
##  3rd Qu.: 562110   4           :   72   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :7000157   (Other)     :12720   Max.   :1.0000   Max.   :1.0000  
##                    NA's        :    8                                    
##  HotelCapacity    HasSwimmingPool
##  Min.   :  1.00   Min.   :0.000  
##  1st Qu.: 16.00   1st Qu.:0.000  
##  Median : 34.00   Median :0.000  
##  Mean   : 62.55   Mean   :0.356  
##  3rd Qu.: 75.00   3rd Qu.:1.000  
##  Max.   :600.00   Max.   :1.000  
## 
hotel$RentCategory <- ifelse(RoomRent<= 2500, 1,ifelse(RoomRent<= 5000, 2, ifelse(RoomRent<= 7500, 3, ifelse(RoomRent<= 10000, 4, 5))))
View(hotel)
boxplot(log10(RoomRent)) 

table(CityName)
## CityName
##             Agra        Ahmedabad         Amritsar        Bangalore 
##              432              424              136              656 
##      Bhubaneswar       Chandigarh          Chennai       Darjeeling 
##              120              336              416              136 
##            Delhi          Gangtok              Goa         Guwahati 
##             2048              128              624               48 
##         Haridwar        Hyderabad           Indore           Jaipur 
##               48              536              160              768 
##        Jaisalmer          Jodhpur           Kanpur            Kochi 
##              264              224               16              608 
##          Kolkata          Lucknow          Madurai           Manali 
##              512              128              112              288 
##        Mangalore           Mumbai           Munnar           Mysore 
##               96              712              328              160 
##         Nainital             Ooty        Panchkula             Pune 
##              144              136               64              600 
##             Puri           Rajkot        Rishikesh           Shimla 
##               56              128               88              280 
##         Srinagar            Surat Thiruvanthipuram         Thrissur 
##               40               80              392               32 
##          Udaipur         Varanasi 
##              456              264
table(Date)
## Date
##   18-Dec-16   21-Dec-16   24-Dec-16   25-Dec-16   28-Dec-16   31-Dec-16 
##          44          44          44          44          44          44 
##    4-Jan-16    4-Jan-17    8-Jan-16    8-Jan-17 Dec 18 2016 Dec 21 2016 
##          31          13          31          13        1607        1610 
## Dec 24 2016 Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017 
##        1610        1610        1610        1610        1547        1541 
##  Jan 4 2017  Jan 8 2017 
##          60          67
table(IsTouristDestination)
## IsTouristDestination
##    0    1 
## 3999 9225

Relation of RoomRent with other variables

RoomRent vs CityName

library(car)
par(mfrow=c(1,1))
boxplot(log10(RoomRent)~CityName,data=hotel, main="Room Rent vs. City Name", ylab="City Name", xlab="Room Rent", horizontal=TRUE)

scatterplot(CityName,log10(RoomRent))

##   [1] "7241"  "7242"  "7243"  "7244"  "7245"  "7246"  "7247"  "7248" 
##   [9] "7281"  "7470"  "7617"  "7618"  "7619"  "7620"  "7621"  "7622" 
##  [17] "7623"  "7624"  "4402"  "4407"  "4408"  "4777"  "10090" "10093"
##  [25] "9147"  "9162"  "9338"  "9339"  "9342"  "11777" "11778" "11779"
##  [33] "11780" "11781" "11747" "11748" "11750" "11763" "11764" "11766"
##  [41] "11771" "11772" "11773" "11774" "2038"  "2040"  "2182"  "2425" 
##  [49] "2430"  "8247"  "8310"  "8333"  "8335"  "8363"  "8365"  "8366" 
##  [57] "4233"  "4234"  "4235"  "4236"  "4237"  "4238"  "4239"  "4240" 
##  [65] "6152"  "6533"  "6534"  "6535"  "6536"  "6537"  "6538"  "6539" 
##  [73] "6488"  "6489"  "6490"  "9034"  "9035"  "9036"  "9037"  "9038" 
##  [81] "9039"  "10686" "10693" "10694" "10699" "10701" "10702" "7686" 
##  [89] "27"    "182"   "590"   "646"   "12704" "12745" "12752" "11702"
##  [97] "11921" "11927" "11928" "9662"  "9661"  "9663"  "9755"  "9757" 
## [105] "9758"  "9659"  "9660"  "9579"  "9580"  "6761"  "6762"  "6763" 
## [113] "6757"  "6758"  "8657"  "8658"  "8660"  "8663"  "8664"  "8769" 
## [121] "8770"  "8772"  "8776"  "8659"

RoomRent vs Population

boxplot(log10(RoomRent)~Population,data=hotel, main="Room Rent vs. Population", horizontal=TRUE,xlab="log10(RoomRent)",ylab="Population")

scatterplot(Population,log10(RoomRent))

chisq.test(Population,RoomRent) # p-value < 2.2e-16
## Warning in chisq.test(Population, RoomRent): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Population and RoomRent
## X-squared = 246000, df = 92579, p-value < 2.2e-16
t.test(Population,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  Population and RoomRent
## t = 119.18, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  4341141 4486320
## sample estimates:
##   mean of x   mean of y 
## 4419206.714    5476.028

RoomRent vs MetroCity

boxplot(log10(RoomRent)~IsMetroCity,data=hotel, main="Room Rent vs. MetroCity", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsMetrocity")

scatterplot(IsMetroCity,log10(RoomRent))

aggregate(RoomRent, by=list(IsMetroCity), mean)
##   Group.1        x
## 1       0 5785.900
## 2       1 4696.073
chisq.test(IsMetroCity,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(IsMetroCity, RoomRent): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  IsMetroCity and RoomRent
## X-squared = 6441.3, df = 2153, p-value < 2.2e-16
t.test(IsMetroCity,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  IsMetroCity and RoomRent
## t = -85.848, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5600.769 -5350.718
## sample estimates:
##    mean of x    mean of y 
##    0.2843315 5476.0278282

RoomRent vs Tourist Destination

boxplot(log10(RoomRent)~IsTouristDestination,data=hotel, main="Room Rent vs. Tourist Destination", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsTouristDestination")

scatterplot(IsTouristDestination,log10(RoomRent))

aggregate(RoomRent, by=list(IsTouristDestination), mean)
##   Group.1        x
## 1       0 4115.009
## 2       1 6066.024
chisq.test(IsTouristDestination,RoomRent) #p-value < 2.2e-16
## Warning in chisq.test(IsTouristDestination, RoomRent): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  IsTouristDestination and RoomRent
## X-squared = 6402.6, df = 2153, p-value < 2.2e-16
t.test(IsTouristDestination,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  IsTouristDestination and RoomRent
## t = -85.842, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5600.356 -5350.305
## sample estimates:
##    mean of x    mean of y 
##    0.6975953 5476.0278282

RoomRent vs Weekend

boxplot(log10(RoomRent)~IsWeekend,data=hotel, main="Room Rent vs. Weekend", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsWeekend")

scatterplot(IsWeekend,log10(RoomRent))

aggregate(RoomRent, by=list(IsWeekend), mean)
##   Group.1        x
## 1       0 5430.835
## 2       1 5503.424
chisq.test(IsWeekend,RoomRent) #p-value = 1
## Warning in chisq.test(IsWeekend, RoomRent): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  IsWeekend and RoomRent
## X-squared = 1583.5, df = 2153, p-value = 1
t.test(IsWeekend,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  IsWeekend and RoomRent
## t = -85.843, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5600.431 -5350.380
## sample estimates:
##    mean of x    mean of y 
##    0.6225802 5476.0278282

RoomRent vs NewYearEve

boxplot(log10(RoomRent)~IsNewYearEve,data=hotel, main="Room Rent vs. NewYearEve", horizontal=TRUE,xlab="log10(RoomRent)",ylab="IsNewYearEve")

scatterplot(IsNewYearEve,log10(RoomRent))

aggregate(RoomRent, by=list(IsNewYearEve), mean)
##   Group.1        x
## 1       0 5369.594
## 2       1 6225.203
chisq.test(IsNewYearEve,RoomRent) #p-value = 0.9474
## Warning in chisq.test(IsNewYearEve, RoomRent): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  IsNewYearEve and RoomRent
## X-squared = 2047.8, df = 2153, p-value = 0.9474
t.test(IsNewYearEve,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  IsNewYearEve and RoomRent
## t = -85.851, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5600.929 -5350.878
## sample estimates:
##   mean of x   mean of y 
##    0.124395 5476.027828

RoomRent vs StarRating

boxplot(log10(RoomRent)~StarRating,data=hotel, main="Room Rent vs. StarRating", horizontal=TRUE,xlab="log10(RoomRent)",ylab="StarRating")

scatterplot(StarRating,log10(RoomRent))

aggregate(RoomRent, by=list(StarRating), mean)
##    Group.1         x
## 1      0.0  7237.125
## 2      1.0   686.625
## 3      2.0  2783.166
## 4      2.5  2520.816
## 5      3.0  3696.945
## 6      3.2 15937.500
## 7      3.3  2841.062
## 8      3.4 23437.500
## 9      3.5  4843.346
## 10     3.6  7769.500
## 11     3.7  6701.958
## 12     3.8  5400.062
## 13     3.9 13062.750
## 14     4.0  6393.105
## 15     4.1 19075.000
## 16     4.3  7423.125
## 17     4.4  5563.500
## 18     4.5  8699.920
## 19     4.7 10125.000
## 20     4.8 46752.812
## 21     5.0 12398.221
chisq.test(StarRating,RoomRent) #p-value < 2.2e-16 
## Warning in chisq.test(StarRating, RoomRent): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  StarRating and RoomRent
## X-squared = 132310, df = 43060, p-value < 2.2e-16
t.test(StarRating,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  StarRating and RoomRent
## t = -85.799, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5597.594 -5347.543
## sample estimates:
##   mean of x   mean of y 
##    3.459211 5476.027828

RoomRent vs Distance from Airport

boxplot(log10(RoomRent)~Airport,data=hotel, main="Room Rent vs. Airport", horizontal=TRUE,xlab="log10(RoomRent)",ylab="Distance of Airport")

scatterplot(Airport,log10(RoomRent))

aggregate(RoomRent, by=list(Airport), mean)
##     Group.1         x
## 1       0.2  5247.000
## 2       0.3  2217.500
## 3       0.4  4987.025
## 4       0.5  2437.458
## 5       0.6  4805.781
## 6       0.7  2894.750
## 7       0.8  4687.500
## 8       0.9  4356.128
## 9       1.0  4452.812
## 10      1.1 43487.500
## 11      1.2  5154.050
## 12      1.4 15562.500
## 13      1.5  7273.250
## 14      1.6 10460.469
## 15      1.7  4968.091
## 16      1.8  3298.681
## 17      1.9  6440.400
## 18      2.0  5804.821
## 19      2.1  3400.975
## 20      2.2  4448.000
## 21      2.3  8030.938
## 22      2.4  5419.656
## 23      2.5  5863.000
## 24      2.6  3027.396
## 25      2.7  4659.054
## 26      2.8  4563.417
## 27      2.9  3667.232
## 28      3.0  4636.929
## 29      3.1  5859.875
## 30      3.2  7848.667
## 31      3.3  8960.125
## 32      3.4  6088.854
## 33      3.5  4267.839
## 34      3.6  6699.828
## 35      3.7  2603.125
## 36      3.8  3145.825
## 37      3.9  4075.250
## 38      4.0  4013.944
## 39      4.1  4218.688
## 40      4.2  3857.075
## 41      4.3  4388.656
## 42      4.4  5525.062
## 43      4.5  6135.208
## 44      4.6  5061.025
## 45      4.7  2378.125
## 46      4.8  3889.250
## 47      4.9  3421.219
## 48      5.0  6144.041
## 49      5.1  6547.000
## 50      5.2  4390.306
## 51      5.3  4119.250
## 52      5.4  4705.900
## 53      5.5  4371.625
## 54      5.6  5430.900
## 55      5.7  6046.594
## 56      5.8  5625.607
## 57      5.9  5016.650
## 58      6.0  3875.970
## 59      6.1  5853.375
## 60      6.2  2701.828
## 61      6.3  1705.125
## 62      6.4  3212.479
## 63      6.5  4871.208
## 64      6.6  3623.625
## 65      6.7  2994.292
## 66      6.8  4132.357
## 67      6.9  2749.950
## 68      7.0  4028.469
## 69      7.1  2366.806
## 70      7.2  3611.604
## 71      7.3  2522.042
## 72      7.4  3571.975
## 73      7.5  4816.667
## 74      7.6  4420.141
## 75      7.7 27828.708
## 76      7.8  5656.594
## 77      7.9  2730.958
## 78      8.0  9879.685
## 79      8.1  5120.639
## 80      8.2  3684.607
## 81      8.3  5117.825
## 82      8.4  4164.208
## 83      8.5  2991.688
## 84      8.6  1378.938
## 85      8.7  4457.446
## 86      8.8  3498.562
## 87      8.9  2627.250
## 88      9.0  3618.694
## 89      9.1  5122.458
## 90      9.2  9520.790
## 91      9.3  4147.375
## 92      9.4  5233.500
## 93      9.5  8224.909
## 94      9.6  3226.050
## 95      9.7  4376.083
## 96      9.8  4045.625
## 97      9.9  7947.732
## 98     10.0  5184.302
## 99     10.2  2780.000
## 100    10.3  2587.000
## 101    10.4  2346.000
## 102    10.6  1574.375
## 103    10.7  7025.000
## 104    10.8 12157.875
## 105    10.9  1949.812
## 106    11.0  4699.239
## 107    11.1  2867.250
## 108    11.3  1948.812
## 109    11.7  4069.000
## 110    11.9  7264.938
## 111    12.0  5014.164
## 112    12.2  3113.458
## 113    12.3  1746.750
## 114    12.6  4241.000
## 115    12.7  4566.750
## 116    13.0  6872.332
## 117    13.1  2525.000
## 118    13.3  4881.250
## 119    13.5  1831.250
## 120    13.6  4371.333
## 121    13.7  5908.812
## 122    13.8  2507.500
## 123    14.0  3632.243
## 124    14.2  1801.000
## 125    14.4  4002.167
## 126    14.5  3847.500
## 127    14.6  6711.438
## 128    14.7  5431.167
## 129    14.8  7086.625
## 130    14.9  4631.250
## 131    15.0  4804.245
## 132    15.3  2983.875
## 133    15.4  5179.125
## 134    15.6  4233.375
## 135    15.7  3385.250
## 136    15.8  5960.500
## 137    15.9  9961.875
## 138    16.0  5052.724
## 139    16.1 10451.000
## 140    16.2  4637.250
## 141    16.4  2404.250
## 142    16.5  4639.250
## 143    16.7  6648.281
## 144    17.0  5245.613
## 145    17.1  3251.000
## 146    17.2  4874.500
## 147    17.4  1911.750
## 148    17.5 16538.125
## 149    17.6  6273.000
## 150    17.8  4139.438
## 151    18.0  5023.542
## 152    18.3  6125.000
## 153    18.5  3543.250
## 154    18.6  6693.750
## 155    18.7  2782.625
## 156    19.0 10216.920
## 157    19.5  2262.500
## 158    19.9  7232.500
## 159    20.0  5474.096
## 160    20.2  8412.500
## 161    20.3  3930.812
## 162    20.5  2169.625
## 163    20.9  6281.750
## 164    21.0  4546.419
## 165    21.4  6944.500
## 166    21.5  3882.750
## 167    22.0  4453.590
## 168    22.1  5305.000
## 169    22.2  3235.000
## 170    22.4  3887.500
## 171    22.5  6103.250
## 172    23.0  5019.740
## 173    23.2 10887.500
## 174    23.3  5088.000
## 175    23.4  4942.375
## 176    24.0  3863.335
## 177    24.2 38115.625
## 178    24.3 16894.500
## 179    24.5  5305.750
## 180    24.6 45274.375
## 181    24.7  2078.000
## 182    24.9 20867.438
## 183    25.0  5229.457
## 184    25.6  7140.625
## 185    25.7  6137.500
## 186    25.9 15937.500
## 187    26.0  6258.703
## 188    26.1 26156.250
## 189    26.3  2369.250
## 190    26.4  7483.000
## 191    26.5  6112.500
## 192    26.7  7992.500
## 193    27.0  5835.206
## 194    27.1 23437.500
## 195    27.2  4832.000
## 196    28.0  3282.277
## 197    28.1  7140.625
## 198    28.6  7518.750
## 199    28.7  3781.625
## 200    29.0  3602.364
## 201    30.0  5784.393
## 202    30.5 20500.000
## 203    31.0  4943.406
## 204    31.2  6193.750
## 205    31.3  9125.000
## 206    31.9  4204.750
## 207    32.0  5803.528
## 208    32.9  7936.875
## 209    33.0  3026.100
## 210    33.4  6292.000
## 211    34.0  5784.875
## 212    35.0  8111.898
## 213    36.0  7528.882
## 214    36.2  6871.500
## 215    37.0  8712.878
## 216    38.0  6006.755
## 217    38.3  8117.875
## 218    39.0  4524.650
## 219    39.9  2206.500
## 220    40.0  5576.768
## 221    41.0  5355.676
## 222    42.0  3292.293
## 223    42.7  4118.750
## 224    43.0  7559.758
## 225    43.9  9247.500
## 226    44.0  5925.000
## 227    44.5  4233.125
## 228    44.6  7147.000
## 229    44.8 33033.500
## 230    46.0  4236.850
## 231    47.0  7256.000
## 232    47.5 19108.125
## 233    48.0  4268.750
## 234    48.4  3000.000
## 235    49.0 18237.500
## 236    50.0  5681.875
## 237    50.1  2360.875
## 238    50.5  3417.750
## 239    51.0  3178.250
## 240    52.0  4198.375
## 241    52.7  7820.000
## 242    53.0  4062.500
## 243    55.0 18950.000
## 244    57.2 15375.000
## 245    60.0  2846.000
## 246    61.0 14319.062
## 247    62.0  5412.719
## 248    63.0  8687.500
## 249    63.5  3900.000
## 250    63.6  2625.000
## 251    65.0  6257.888
## 252    67.6  4149.750
## 253    69.0  2682.125
## 254    73.1  3172.500
## 255    80.0  2554.000
## 256    80.3  1117.750
## 257    81.0  2554.000
## 258    82.0  6717.111
## 259    83.0  2554.000
## 260    84.0  2554.000
## 261    85.0  2554.000
## 262    86.0  2554.000
## 263    87.0  2554.000
## 264    91.3  1758.875
## 265    96.5  3821.375
## 266   100.0  6144.257
## 267   102.4  6444.750
## 268   105.0  8162.371
## 269   110.0  5976.109
## 270   117.4  6337.375
## 271   124.0  4629.648
chisq.test(Airport,RoomRent) #p-value < 2.2e-16 
## Warning in chisq.test(Airport, RoomRent): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  Airport and RoomRent
## X-squared = 1583700, df = 581310, p-value < 2.2e-16
t.test(Airport,RoomRent) #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  Airport and RoomRent
## t = -85.521, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5579.889 -5329.837
## sample estimates:
##  mean of x  mean of y 
##   21.16489 5476.02783

RoomRent vs FreeWifi

boxplot(log10(RoomRent)~FreeWifi,data=hotel, main="Room Rent vs. FreeWifi", horizontal=TRUE,xlab="log10(RoomRent)",ylab="FreeWifi")

scatterplot(FreeWifi,log10(RoomRent))

aggregate(RoomRent, by=list(FreeWifi), mean)
##   Group.1        x
## 1       0 5380.004
## 2       1 5483.722
chisq.test(FreeWifi,RoomRent) #p-value < 2.2e-16 
## Warning in chisq.test(FreeWifi, RoomRent): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  FreeWifi and RoomRent
## X-squared = 5910.4, df = 2153, p-value < 2.2e-16
t.test(RoomRent~FreeWifi) #p-value= 0.4325
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by FreeWifi
## t = -0.78503, df = 1805.8, p-value = 0.4325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -362.8424  155.4066
## sample estimates:
## mean in group 0 mean in group 1 
##        5380.004        5483.722

RoomRent vs FreeBreakfast

boxplot(log10(RoomRent)~FreeBreakfast,data = hotel, main="Room Rent vs Freebreakfast", horizontal= TRUE,xlab="log10(RoomRent)",ylab="FreeBreakfast")

scatterplot(FreeBreakfast,log10(RoomRent))

aggregate(RoomRent, by=list(FreeBreakfast), mean)
##   Group.1        x
## 1       0 5573.790
## 2       1 5423.131
chisq.test(FreeBreakfast, RoomRent)  #p-value < 2.2e-16
## Warning in chisq.test(FreeBreakfast, RoomRent): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  FreeBreakfast and RoomRent
## X-squared = 6488.9, df = 2153, p-value < 2.2e-16
t.test(RoomRent~FreeBreakfast) #p-value= 0.3365
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by FreeBreakfast
## t = 0.96115, df = 6214.6, p-value = 0.3365
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -156.6221  457.9393
## sample estimates:
## mean in group 0 mean in group 1 
##        5573.790        5423.131

RoomRent vs HotelCapacity

boxplot(log10(RoomRent)~HotelCapacity,data = hotel, main= "Room Rent vs Hotel Capacity", horizontal= TRUE,xlab="log10(RoomRent)",ylab="HotelCapacity")

scatterplot(HotelCapacity,log10(RoomRent))

aggregate(RoomRent,by=list(HotelCapacity), mean)
##     Group.1         x
## 1         1  5613.062
## 2         2  9757.688
## 3         3  4295.188
## 4         4  3174.438
## 5         5  3216.144
## 6         6  3956.319
## 7         7  2398.719
## 8         8  3799.724
## 9         9  6148.659
## 10       10  5846.000
## 11       11  4153.733
## 12       12  4853.848
## 13       13  4817.900
## 14       14  7621.406
## 15       15  3714.794
## 16       16  6343.458
## 17       17  5198.688
## 18       18  3020.744
## 19       19  5436.503
## 20       20  3363.311
## 21       21  3169.528
## 22       22  3575.667
## 23       23  3714.547
## 24       24  4055.228
## 25       25  2966.417
## 26       26  3476.444
## 27       27  4289.575
## 28       28  4625.722
## 29       29  7759.860
## 30       30  4561.662
## 31       31  6490.971
## 32       32  4173.807
## 33       33  5002.913
## 34       34  4478.892
## 35       35  3762.562
## 36       36  5194.946
## 37       37  3272.054
## 38       38  3212.292
## 39       39  6695.972
## 40       40  4575.496
## 41       41  4880.479
## 42       42  4291.326
## 43       43  5459.178
## 44       44  3930.445
## 45       45  3882.049
## 46       46  4824.778
## 47       47  4653.042
## 48       48  4245.281
## 49       49  4665.823
## 50       50  6566.019
## 51       51  4099.300
## 52       52  4174.858
## 53       53  3646.833
## 54       54  5537.889
## 55       55  8833.153
## 56       56  3885.562
## 57       57  4895.875
## 58       58  5659.500
## 59       59 10682.646
## 60       60  6806.430
## 61       61  6117.575
## 62       62  6533.833
## 63       63  5170.375
## 64       64 15013.107
## 65       65  9644.000
## 66       66  5683.075
## 67       67  5756.163
## 68       68  6253.438
## 69       69  4759.646
## 70       70  8099.875
## 71       71 15643.000
## 72       72  7467.181
## 73       73  5314.969
## 74       74  2347.562
## 75       75  9944.344
## 76       76  3903.875
## 77       77  6125.896
## 78       78 10787.500
## 79       79 48024.583
## 80       80  8503.034
## 81       81  4579.938
## 82       82  4553.150
## 83       83 17432.562
## 84       84 12533.333
## 85       85  9676.047
## 86       86  4999.000
## 87       87 17063.625
## 88       88  8249.271
## 89       89  3734.375
## 90       90  7072.500
## 91       91  2812.125
## 92       92  5826.229
## 93       93  5771.167
## 94       94  6662.812
## 95       95  5801.938
## 96       96 10487.312
## 97       97  5798.750
## 98       98  7016.958
## 99       99  4659.125
## 100     100  5841.987
## 101     101  2845.875
## 102     102 14709.075
## 103     103  5898.500
## 104     104  6792.375
## 105     106  6668.167
## 106     107  5448.750
## 107     108  5265.333
## 108     109  3445.875
## 109     110  7293.042
## 110     111  4630.625
## 111     112  6043.484
## 112     113  4179.000
## 113     114  5743.417
## 114     115  4081.375
## 115     117  6775.312
## 116     119  5657.958
## 117     120  6593.750
## 118     121  3015.500
## 119     122  5936.562
## 120     124  5790.000
## 121     126  8693.812
## 122     127  4864.250
## 123     128  5245.500
## 124     129  5742.325
## 125     130  7160.677
## 126     132  7014.857
## 127     133  4765.625
## 128     134  6598.750
## 129     135  6650.062
## 130     136  6250.000
## 131     137  8918.938
## 132     138  5580.000
## 133     139  4432.125
## 134     140  7414.000
## 135     141  8150.938
## 136     142  5625.000
## 137     144  6138.438
## 138     145 12424.375
## 139     147  5384.500
## 140     148  4499.000
## 141     149  6031.333
## 142     150  5850.792
## 143     151  4931.083
## 144     153  6442.933
## 145     154 10755.417
## 146     155  4459.688
## 147     159  2384.250
## 148     160  9771.667
## 149     162  4500.000
## 150     164  8049.833
## 151     165  3993.625
## 152     166  5681.250
## 153     167  6647.500
## 154     170 11875.000
## 155     171  4403.375
## 156     172  6347.375
## 157     173  4448.688
## 158     176  5950.000
## 159     177  7280.250
## 160     178  5398.375
## 161     179 11840.938
## 162     180  4574.000
## 163     181  4984.458
## 164     182  5875.000
## 165     183 18552.250
## 166     184  6393.125
## 167     187  5262.500
## 168     189  7275.000
## 169     190  3621.571
## 170     191  4931.625
## 171     195  6783.750
## 172     196  3659.000
## 173     197  6693.750
## 174     198  5437.625
## 175     199  7018.750
## 176     200  8482.792
## 177     201  8281.250
## 178     202 13359.375
## 179     203  4900.000
## 180     204  7338.250
## 181     205  2045.875
## 182     207 10330.875
## 183     208  7666.417
## 184     209  4900.000
## 185     210  5625.000
## 186     211  7140.042
## 187     212 10375.000
## 188     214  4626.333
## 189     215  8186.750
## 190     216  7320.000
## 191     218  6683.250
## 192     220  6828.250
## 193     222  6187.500
## 194     223  5432.500
## 195     228  4837.500
## 196     229 10887.500
## 197     230  4886.500
## 198     231  9127.750
## 199     232  3300.000
## 200     233  9968.750
## 201     234  5656.875
## 202     235 14850.000
## 203     236  5875.000
## 204     237  6375.000
## 205     240 15937.500
## 206     244  6526.292
## 207     247  6416.625
## 208     248  6493.750
## 209     251 10125.000
## 210     254 17025.625
## 211     255 18237.500
## 212     260  8823.750
## 213     261  8406.250
## 214     264  7397.938
## 215     267  4776.000
## 216     269  7065.250
## 217     270  8042.875
## 218     273  9375.000
## 219     279  9874.375
## 220     281 11187.500
## 221     286  6709.500
## 222     287 10864.188
## 223     292 10184.375
## 224     293  6523.875
## 225     302 11187.500
## 226     310  6052.125
## 227     311  4249.000
## 228     317  8716.667
## 229     323  9750.000
## 230     324 10812.500
## 231     326  8887.500
## 232     327  6508.333
## 233     334  7068.125
## 234     340  6125.000
## 235     341  9280.375
## 236     385  6861.500
## 237     390  9031.125
## 238     393  4581.875
## 239     400  6298.750
## 240     403 12157.875
## 241     411  8775.000
## 242     414  7165.250
## 243     419  5500.000
## 244     436  8993.750
## 245     451  6300.000
## 246     461  7429.125
## 247     480  8061.500
## 248     493 13348.250
## 249     507  8562.500
## 250     523  9543.750
## 251     550  6741.500
## 252     560 14317.188
## 253     600  6928.750
chisq.test(HotelCapacity, RoomRent)  #p-value < 2.2e-16
## Warning in chisq.test(HotelCapacity, RoomRent): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  HotelCapacity and RoomRent
## X-squared = 1468500, df = 542560, p-value < 2.2e-16
t.test(RoomRent, HotelCapacity)  #p-value < 2.2e-16
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent and HotelCapacity
## t = 84.868, df = 13226, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5288.446 5538.511
## sample estimates:
##  mean of x  mean of y 
## 5476.02783   62.54946

RoomRent vs HasSwimmingPool

boxplot(log10(RoomRent)~HasSwimmingPool,data = hotel, main= "Room Rent vs HasSwimmingPool", horizontal= TRUE,xlab="log10(RoomRent)",ylab="HasSwimmimgPool")

scatterplot(HasSwimmingPool,log10(RoomRent))

aggregate(RoomRent,by=list(HasSwimmingPool), mean)
##   Group.1        x
## 1       0 3777.132
## 2       1 8549.052
chisq.test(HasSwimmingPool, RoomRent)  #p-value < 2.2e-16
## Warning in chisq.test(HasSwimmingPool, RoomRent): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  HasSwimmingPool and RoomRent
## X-squared = 8387.9, df = 2153, p-value < 2.2e-16
t.test(RoomRent, HasSwimmingPool) #p-value= 0.3365
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent and HasSwimmingPool
## t = 85.847, df = 13223, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5350.646 5600.697
## sample estimates:
##    mean of x    mean of y 
## 5476.0278282    0.3560194

Checking whether Star-Rating is a reliable factor to decide facilities provided by a hotel

StarRating vs FreeWifi

boxplot(StarRating~FreeWifi, data=hotel, main="StarRating vs Freewifi",horizontal=TRUE,xlab="StarRating",ylab="Freewifi")

scatterplot(FreeWifi,StarRating)

aggregate(StarRating, by=list(FreeWifi),mean)
##   Group.1        x
## 1       0 3.410805
## 2       1 3.463089
chisq.test(StarRating,FreeWifi) #p-value=2.2e-16
## Warning in chisq.test(StarRating, FreeWifi): Chi-squared approximation may
## be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  StarRating and FreeWifi
## X-squared = 217.34, df = 20, p-value < 2.2e-16

StarRating vs FreeBreakFast

boxplot(StarRating~FreeBreakfast, data=hotel, main="StarRating vs FreeBreakfast",horizontal=TRUE,xlab="StarRating",ylab="FreeBreakfast")

scatterplot(StarRating,FreeBreakfast)

aggregate(StarRating, by=list(FreeBreakfast),mean)
##   Group.1        x
## 1       0 3.492763
## 2       1 3.441056
chisq.test(StarRating,FreeBreakfast) #p-value=2.2e-16
## Warning in chisq.test(StarRating, FreeBreakfast): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  StarRating and FreeBreakfast
## X-squared = 435.38, df = 20, p-value < 2.2e-16

StarRating vs HotelCapacity

boxplot(StarRating~HotelCapacity, data=hotel, main="StarRating vs HotelCapacity",horizontal=TRUE,xlab="StarRating",ylab="HotelCapacity")

scatterplot(HotelCapacity,StarRating)

aggregate(StarRating, by=list(HotelCapacity),mean)
##     Group.1        x
## 1         1 3.750000
## 2         2 4.000000
## 3         3 2.500000
## 4         4 2.875000
## 5         5 2.923077
## 6         6 2.722222
## 7         7 2.625000
## 8         8 2.868421
## 9         9 3.090909
## 10       10 2.986564
## 11       11 3.133333
## 12       12 2.928571
## 13       13 2.900000
## 14       14 2.800000
## 15       15 2.972503
## 16       16 3.000000
## 17       17 3.000000
## 18       18 2.904762
## 19       19 3.079882
## 20       20 3.094595
## 21       21 3.081818
## 22       22 3.083333
## 23       23 2.968750
## 24       24 3.148789
## 25       25 3.050000
## 26       26 3.027778
## 27       27 3.275000
## 28       28 3.136364
## 29       29 3.511628
## 30       30 3.332432
## 31       31 3.470588
## 32       32 3.242424
## 33       33 3.230769
## 34       34 3.233333
## 35       35 3.000000
## 36       36 3.223810
## 37       37 3.285714
## 38       38 3.200000
## 39       39 3.416667
## 40       40 3.468750
## 41       41 3.416667
## 42       42 3.333333
## 43       43 3.726027
## 44       44 3.375000
## 45       45 3.222222
## 46       46 3.444444
## 47       47 3.666667
## 48       48 3.475000
## 49       49 3.166667
## 50       50 3.653846
## 51       51 3.700000
## 52       52 3.500000
## 53       53 3.333333
## 54       54 3.466667
## 55       55 3.888889
## 56       56 3.750000
## 57       57 3.600000
## 58       58 3.800000
## 59       59 4.000000
## 60       60 3.406250
## 61       61 3.900000
## 62       62 3.833333
## 63       63 4.000000
## 64       64 3.842857
## 65       65 3.672727
## 66       66 3.600000
## 67       67 3.550000
## 68       68 3.250000
## 69       69 3.666667
## 70       70 3.555556
## 71       71 3.266667
## 72       72 3.611111
## 73       73 3.375000
## 74       74 3.250000
## 75       75 4.500000
## 76       76 3.800000
## 77       77 3.833333
## 78       78 4.300000
## 79       79 3.833333
## 80       80 3.754545
## 81       81 3.875000
## 82       82 3.600000
## 83       83 3.950000
## 84       84 4.333333
## 85       85 4.312500
## 86       86 4.000000
## 87       87 4.250000
## 88       88 3.916667
## 89       89 4.000000
## 90       90 4.000000
## 91       91 3.000000
## 92       92 3.666667
## 93       93 4.000000
## 94       94 3.875000
## 95       95 4.000000
## 96       96 4.000000
## 97       97 4.500000
## 98       98 4.083333
## 99       99 4.125000
## 100     100 3.697436
## 101     101 3.500000
## 102     102 4.300000
## 103     103 3.500000
## 104     104 4.250000
## 105     106 4.000000
## 106     107 3.666667
## 107     108 4.250000
## 108     109 3.750000
## 109     110 4.166667
## 110     111 4.000000
## 111     112 4.125000
## 112     113 4.000000
## 113     114 4.000000
## 114     115 4.000000
## 115     117 4.000000
## 116     119 4.333333
## 117     120 4.000000
## 118     121 3.900000
## 119     122 4.000000
## 120     124 5.000000
## 121     126 4.500000
## 122     127 4.500000
## 123     128 4.000000
## 124     129 4.300000
## 125     130 4.112903
## 126     132 4.214286
## 127     133 4.000000
## 128     134 4.000000
## 129     135 4.000000
## 130     136 5.000000
## 131     137 4.625000
## 132     138 4.000000
## 133     139 4.500000
## 134     140 4.300000
## 135     141 4.075000
## 136     142 4.333333
## 137     144 3.750000
## 138     145 4.666667
## 139     147 4.000000
## 140     148 4.000000
## 141     149 3.833333
## 142     150 3.716667
## 143     151 3.433333
## 144     153 4.000000
## 145     154 4.666667
## 146     155 3.950000
## 147     159 4.000000
## 148     160 4.333333
## 149     162 4.000000
## 150     164 4.666667
## 151     165 3.500000
## 152     166 4.000000
## 153     167 4.500000
## 154     170 4.500000
## 155     171 4.500000
## 156     172 4.166667
## 157     173 3.750000
## 158     176 4.000000
## 159     177 3.750000
## 160     178 4.625000
## 161     179 5.000000
## 162     180 3.500000
## 163     181 4.333333
## 164     182 5.000000
## 165     183 4.000000
## 166     184 4.500000
## 167     187 5.000000
## 168     189 5.000000
## 169     190 4.000000
## 170     191 5.000000
## 171     195 5.000000
## 172     196 3.000000
## 173     197 4.000000
## 174     198 4.000000
## 175     199 5.000000
## 176     200 4.500000
## 177     201 5.000000
## 178     202 5.000000
## 179     203 5.000000
## 180     204 4.500000
## 181     205 3.000000
## 182     207 4.000000
## 183     208 5.000000
## 184     209 4.500000
## 185     210 5.000000
## 186     211 4.666667
## 187     212 5.000000
## 188     214 3.833333
## 189     215 5.000000
## 190     216 5.000000
## 191     218 5.000000
## 192     220 4.333333
## 193     222 5.000000
## 194     223 4.500000
## 195     228 5.000000
## 196     229 5.000000
## 197     230 4.000000
## 198     231 5.000000
## 199     232 4.000000
## 200     233 5.000000
## 201     234 5.000000
## 202     235 5.000000
## 203     236 4.000000
## 204     237 5.000000
## 205     240 3.200000
## 206     244 4.500000
## 207     247 5.000000
## 208     248 4.500000
## 209     251 4.700000
## 210     254 5.000000
## 211     255 5.000000
## 212     260 4.300000
## 213     261 5.000000
## 214     264 5.000000
## 215     267 4.000000
## 216     269 5.000000
## 217     270 5.000000
## 218     273 5.000000
## 219     279 5.000000
## 220     281 5.000000
## 221     286 5.000000
## 222     287 5.000000
## 223     292 5.000000
## 224     293 5.000000
## 225     302 5.000000
## 226     310 5.000000
## 227     311 4.000000
## 228     317 5.000000
## 229     323 5.000000
## 230     324 5.000000
## 231     326 5.000000
## 232     327 5.000000
## 233     334 4.000000
## 234     340 5.000000
## 235     341 5.000000
## 236     385 5.000000
## 237     390 5.000000
## 238     393 5.000000
## 239     400 5.000000
## 240     403 5.000000
## 241     411 5.000000
## 242     414 5.000000
## 243     419 4.000000
## 244     436 5.000000
## 245     451 5.000000
## 246     461 5.000000
## 247     480 5.000000
## 248     493 5.000000
## 249     507 5.000000
## 250     523 5.000000
## 251     550 5.000000
## 252     560 5.000000
## 253     600 5.000000
chisq.test(StarRating,HotelCapacity) #p-value<2.2e-16
## Warning in chisq.test(StarRating, HotelCapacity): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  StarRating and HotelCapacity
## X-squared = 77127, df = 5040, p-value < 2.2e-16

StarRating vs HasSwimmingPool

boxplot(StarRating~HasSwimmingPool, data=hotel, main="StarRating vs HasSwimmingPool",horizontal=TRUE,xlab="StarRating",ylab="HasSwimmimgPool")

scatterplot(HasSwimmingPool,StarRating)

aggregate(StarRating, by=list(HasSwimmingPool),mean)
##   Group.1        x
## 1       0 3.111602
## 2       1 4.087978
chisq.test(StarRating,HasSwimmingPool) #p-value=2.2e-16
## Warning in chisq.test(StarRating, HasSwimmingPool): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  StarRating and HasSwimmingPool
## X-squared = 5804.9, df = 20, p-value < 2.2e-16

Scatterplot Matrix

scatterplotMatrix(~RoomRent+CityRank+Population+IsMetroCity+IsTouristDestination+IsWeekend+IsNewYearEve,data=hotel, main="Scatterplot for Various Variables")
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

scatterplotMatrix(~RoomRent+StarRating+Airport+FreeWifi+FreeBreakfast+HotelCapacity+HasSwimmingPool, data = hotel,main="Scatterplot for Various Variables")
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

Overall Visualisation

library(corrgram)
corrgram(hotel, order=TRUE, lower.panel = panel.shade, upper.panel =  panel.pie, text.panel = panel.txt, main= "Hotel Rent Analysis Correlogram" )

a<-cor(hotel[,c(3:8,11:13,17:21)])
round(a,2)
##                      Population CityRank IsMetroCity IsTouristDestination
## Population                 1.00    -0.84        0.77                -0.05
## CityRank                  -0.84     1.00       -0.56                 0.28
## IsMetroCity                0.77    -0.56        1.00                 0.18
## IsTouristDestination      -0.05     0.28        0.18                 1.00
## IsWeekend                  0.01    -0.01        0.00                -0.02
## IsNewYearEve               0.00     0.00        0.00                 0.00
## RoomRent                  -0.09     0.09       -0.07                 0.12
## StarRating                 0.13    -0.13        0.08                -0.04
## Airport                   -0.26     0.51       -0.21                 0.19
## FreeWifi                   0.11    -0.12        0.09                -0.06
## FreeBreakfast              0.04    -0.01        0.05                -0.07
## HotelCapacity              0.26    -0.26        0.19                -0.10
## HasSwimmingPool            0.03    -0.10        0.02                 0.04
## RentCategory              -0.08     0.15       -0.04                 0.17
##                      IsWeekend IsNewYearEve RoomRent StarRating Airport
## Population                0.01         0.00    -0.09       0.13   -0.26
## CityRank                 -0.01         0.00     0.09      -0.13    0.51
## IsMetroCity               0.00         0.00    -0.07       0.08   -0.21
## IsTouristDestination     -0.02         0.00     0.12      -0.04    0.19
## IsWeekend                 1.00         0.29     0.00       0.01    0.00
## IsNewYearEve              0.29         1.00     0.04       0.00    0.00
## RoomRent                  0.00         0.04     1.00       0.37    0.05
## StarRating                0.01         0.00     0.37       1.00   -0.06
## Airport                   0.00         0.00     0.05      -0.06    1.00
## FreeWifi                  0.00         0.00     0.00       0.02   -0.09
## FreeBreakfast            -0.01         0.00    -0.01      -0.03    0.02
## HotelCapacity             0.01         0.00     0.16       0.64   -0.12
## HasSwimmingPool           0.00         0.00     0.31       0.62   -0.14
## RentCategory              0.01         0.04     0.60       0.55    0.15
##                      FreeWifi FreeBreakfast HotelCapacity HasSwimmingPool
## Population               0.11          0.04          0.26            0.03
## CityRank                -0.12         -0.01         -0.26           -0.10
## IsMetroCity              0.09          0.05          0.19            0.02
## IsTouristDestination    -0.06         -0.07         -0.10            0.04
## IsWeekend                0.00         -0.01          0.01            0.00
## IsNewYearEve             0.00          0.00          0.00            0.00
## RoomRent                 0.00         -0.01          0.16            0.31
## StarRating               0.02         -0.03          0.64            0.62
## Airport                 -0.09          0.02         -0.12           -0.14
## FreeWifi                 1.00          0.16         -0.01           -0.02
## FreeBreakfast            0.16          1.00         -0.09           -0.06
## HotelCapacity           -0.01         -0.09          1.00            0.51
## HasSwimmingPool         -0.02         -0.06          0.51            1.00
## RentCategory            -0.04          0.03          0.36            0.47
##                      RentCategory
## Population                  -0.08
## CityRank                     0.15
## IsMetroCity                 -0.04
## IsTouristDestination         0.17
## IsWeekend                    0.01
## IsNewYearEve                 0.04
## RoomRent                     0.60
## StarRating                   0.55
## Airport                      0.15
## FreeWifi                    -0.04
## FreeBreakfast                0.03
## HotelCapacity                0.36
## HasSwimmingPool              0.47
## RentCategory                 1.00

Linear Regration Models

colnames(hotel)
##  [1] "X"                    "CityName"             "Population"          
##  [4] "CityRank"             "IsMetroCity"          "IsTouristDestination"
##  [7] "IsWeekend"            "IsNewYearEve"         "Date"                
## [10] "HotelName"            "RoomRent"             "StarRating"          
## [13] "Airport"              "HotelAddress"         "HotelPincode"        
## [16] "HotelDescription"     "FreeWifi"             "FreeBreakfast"       
## [19] "HotelCapacity"        "HasSwimmingPool"      "RentCategory"
M1<- lm(RoomRent~Population+CityRank+IsMetroCity+IsTouristDestination+IsWeekend+IsNewYearEve+StarRating+Airport+FreeBreakfast+FreeWifi+HotelCapacity+HasSwimmingPool)
summary(M1)
## 
## Call:
## lm(formula = RoomRent ~ Population + CityRank + IsMetroCity + 
##     IsTouristDestination + IsWeekend + IsNewYearEve + StarRating + 
##     Airport + FreeBreakfast + FreeWifi + HotelCapacity + HasSwimmingPool)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11847  -2358   -691   1030 309691 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -8.605e+03  4.495e+02 -19.143  < 2e-16 ***
## Population           -1.186e-04  3.593e-05  -3.300 0.000969 ***
## CityRank              1.983e+00  1.036e+01   0.191 0.848170    
## IsMetroCity          -6.633e+02  2.165e+02  -3.064 0.002189 ** 
## IsTouristDestination  1.922e+03  1.483e+02  12.962  < 2e-16 ***
## IsWeekend            -8.992e+01  1.239e+02  -0.726 0.468058    
## IsNewYearEve          8.827e+02  1.819e+02   4.852 1.24e-06 ***
## StarRating            3.592e+03  1.108e+02  32.424  < 2e-16 ***
## Airport               9.473e+00  3.173e+00   2.985 0.002839 ** 
## FreeBreakfast         1.693e+02  1.234e+02   1.372 0.169948    
## FreeWifi              5.502e+02  2.243e+02   2.453 0.014183 *  
## HotelCapacity        -1.028e+01  1.034e+00  -9.945  < 2e-16 ***
## HasSwimmingPool       2.153e+03  1.616e+02  13.322  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6602 on 13211 degrees of freedom
## Multiple R-squared:  0.1905, Adjusted R-squared:  0.1897 
## F-statistic:   259 on 12 and 13211 DF,  p-value: < 2.2e-16
M2 <- update(M1, .~. - CityRank-IsWeekend-FreeBreakfast-FreeWifi)
summary(M2)
## 
## Call:
## lm(formula = RoomRent ~ Population + IsMetroCity + IsTouristDestination + 
##     IsNewYearEve + StarRating + Airport + HotelCapacity + HasSwimmingPool)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11798  -2359   -704   1034 309572 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -8.036e+03  3.563e+02 -22.556  < 2e-16 ***
## Population           -1.217e-04  2.262e-05  -5.378 7.66e-08 ***
## IsMetroCity          -6.261e+02  2.133e+02  -2.936 0.003333 ** 
## IsTouristDestination  1.898e+03  1.374e+02  13.814  < 2e-16 ***
## IsNewYearEve          8.433e+02  1.740e+02   4.846 1.27e-06 ***
## StarRating            3.613e+03  1.104e+02  32.733  < 2e-16 ***
## Airport               9.533e+00  2.712e+00   3.515 0.000441 ***
## HotelCapacity        -1.055e+01  1.028e+00 -10.269  < 2e-16 ***
## HasSwimmingPool       2.133e+03  1.598e+02  13.346  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6604 on 13215 degrees of freedom
## Multiple R-squared:  0.1899, Adjusted R-squared:  0.1894 
## F-statistic: 387.2 on 8 and 13215 DF,  p-value: < 2.2e-16
library(leaps)
M2 <- regsubsets(RoomRent ~ Population + IsTouristDestination + IsWeekend + IsNewYearEve + StarRating + Airport + HotelCapacity + FreeWifi + FreeBreakfast,data=hotel, nbest=10)
plot(M2, scale="adjr2")

M3<- lm(RoomRent~Population+CityRank+IsMetroCity+IsTouristDestination+IsNewYearEve+StarRating+Airport+FreeBreakfast+FreeWifi+HotelCapacity+HasSwimmingPool)
summary(M3)
## 
## Call:
## lm(formula = RoomRent ~ Population + CityRank + IsMetroCity + 
##     IsTouristDestination + IsNewYearEve + StarRating + Airport + 
##     FreeBreakfast + FreeWifi + HotelCapacity + HasSwimmingPool)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -11847  -2362   -695   1024 309652 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -8.654e+03  4.444e+02 -19.476  < 2e-16 ***
## Population           -1.191e-04  3.593e-05  -3.315 0.000918 ***
## CityRank              1.861e+00  1.036e+01   0.180 0.857399    
## IsMetroCity          -6.622e+02  2.165e+02  -3.059 0.002228 ** 
## IsTouristDestination  1.925e+03  1.483e+02  12.981  < 2e-16 ***
## IsNewYearEve          8.440e+02  1.740e+02   4.852 1.24e-06 ***
## StarRating            3.592e+03  1.108e+02  32.425  < 2e-16 ***
## Airport               9.482e+00  3.173e+00   2.988 0.002810 ** 
## FreeBreakfast         1.702e+02  1.234e+02   1.380 0.167660    
## FreeWifi              5.497e+02  2.243e+02   2.451 0.014253 *  
## HotelCapacity        -1.028e+01  1.034e+00  -9.943  < 2e-16 ***
## HasSwimmingPool       2.152e+03  1.616e+02  13.317  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6602 on 13212 degrees of freedom
## Multiple R-squared:  0.1904, Adjusted R-squared:  0.1898 
## F-statistic: 282.5 on 11 and 13212 DF,  p-value: < 2.2e-16