email: recs.cse1638@gmail.com College: Rajkiya Engg. College Sonbhadra

Hotel Room Pricing In The Indian Market

The dimensions of the data set are 13232 rows and 19 columns

dim(hotel.df)
## [1] 13232    19

descriptive statistics (min, max, median etc) of each variable.

library(psych)
describe(hotel.df)
##                      vars     n       mean         sd  median    trimmed
## CityName*               1 13232      18.07      11.72      16      17.29
## Population              2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank                3 13232      14.83      13.51       9      13.30
## IsMetroCity             4 13232       0.28       0.45       0       0.23
## IsTouristDestination    5 13232       0.70       0.46       1       0.75
## IsWeekend               6 13232       0.62       0.48       1       0.65
## IsNewYearEve            7 13232       0.12       0.33       0       0.03
## Date*                   8 13232      14.30       2.69      14      14.39
## HotelName*              9 13232     841.19     488.16     827     841.18
## RoomRent               10 13232    5473.99    7333.12    4000    4383.33
## StarRating             11 13232       3.46       0.76       3       3.40
## Airport                12 13232      21.16      22.76      15      16.39
## HotelAddress*          13 13232    1202.53     582.17    1261    1233.25
## HotelPincode           14 13232  397430.26  259837.50  395003  388540.47
## HotelDescription*      15 13224     581.34     363.26     567     575.37
## FreeWifi               16 13232       0.93       0.26       1       1.00
## FreeBreakfast          17 13232       0.65       0.48       1       0.69
## HotelCapacity          18 13232      62.51      76.66      34      46.03
## HasSwimmingPool        19 13232       0.36       0.48       0       0.32
##                             mad      min      max      range  skew
## CityName*                 11.86      1.0       42       41.0  0.48
## Population           3846498.95   8096.0 12442373 12434277.0  0.68
## CityRank                  11.86      0.0       44       44.0  0.69
## IsMetroCity                0.00      0.0        1        1.0  0.96
## IsTouristDestination       0.00      0.0        1        1.0 -0.86
## IsWeekend                  0.00      0.0        1        1.0 -0.51
## IsNewYearEve               0.00      0.0        1        1.0  2.28
## Date*                      2.97      1.0       20       19.0 -0.77
## HotelName*               641.97      1.0     1670     1669.0  0.01
## RoomRent                2653.85    299.0   322500   322201.0 16.75
## StarRating                 0.74      0.0        5        5.0  0.48
## Airport                   11.12      0.2      124      123.8  2.73
## HotelAddress*            668.65      1.0     2108     2107.0 -0.37
## HotelPincode          257975.37 100025.0  7000157  6900132.0  9.99
## HotelDescription*        472.95      1.0     1226     1225.0  0.11
## FreeWifi                   0.00      0.0        1        1.0 -3.25
## FreeBreakfast              0.00      0.0        1        1.0 -0.62
## HotelCapacity             28.17      0.0      600      600.0  2.95
## HasSwimmingPool            0.00      0.0        1        1.0  0.60
##                      kurtosis       se
## CityName*               -0.88     0.10
## Population              -1.08 37019.65
## CityRank                -0.76     0.12
## IsMetroCity             -1.08     0.00
## IsTouristDestination    -1.26     0.00
## IsWeekend               -1.74     0.00
## IsNewYearEve             3.18     0.00
## Date*                    1.92     0.02
## HotelName*              -1.25     4.24
## RoomRent               582.06    63.75
## StarRating               0.25     0.01
## Airport                  7.89     0.20
## HotelAddress*           -0.88     5.06
## HotelPincode           249.76  2258.86
## HotelDescription*       -1.25     3.16
## FreeWifi                 8.57     0.00
## FreeBreakfast           -1.61     0.00
## HotelCapacity           11.39     0.67
## HasSwimmingPool         -1.64     0.00

one-way contingency tables for the categorical variables in your dataset.

table(hotel.df$CityName)
## 
##             Agra        Ahmedabad         Amritsar        Bangalore 
##              432              424              136              656 
##      Bhubaneswar       Chandigarh          Chennai       Darjeeling 
##              120              336              416              136 
##            Delhi          Gangtok              Goa         Guwahati 
##             2048              128              624               48 
##         Haridwar        Hyderabad           Indore           Jaipur 
##               48              536              160              768 
##        Jaisalmer          Jodhpur           Kanpur            Kochi 
##              264              224               16              608 
##          Kolkata          Lucknow          Madurai           Manali 
##              512              128              112              288 
##        Mangalore           Mumbai           Munnar           Mysore 
##              104              712              328              160 
##         Nainital             Ooty        Panchkula             Pune 
##              144              136               64              600 
##             Puri           Rajkot        Rishikesh           Shimla 
##               56              128               88              280 
##         Srinagar            Surat Thiruvanthipuram         Thrissur 
##               40               80              392               32 
##          Udaipur         Varanasi 
##              456              264
attach(hotel.df)
table(Date)
## Date
##   04-Jan-16   04-Jan-17   08-Jan-16   08-Jan-17   18-Dec-16   21-Dec-16 
##          31          13          31          13          44          44 
##   24-Dec-16   25-Dec-16   28-Dec-16   31-Dec-16 Dec 18 2016 Dec 21 2016 
##          44          44          44          44        1608        1611 
## Dec 24 2016 Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017 
##        1611        1611        1611        1611        1548        1542 
##  Jan 4 2017  Jan 8 2017 
##          60          67
table(StarRating)
## StarRating
##    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9    4  4.1 
##   16    8  440  632 5953    8   16    8 1752    8   24   16   32 2463   24 
##  4.3  4.4  4.5  4.7  4.8    5 
##   16    8  376    8   16 1408
table(FreeBreakfast)
## FreeBreakfast
##    0    1 
## 4643 8589
table(FreeWifi)
## FreeWifi
##     0     1 
##   981 12251
table(HasSwimmingPool)
## HasSwimmingPool
##    0    1 
## 8524 4708
table(IsMetroCity)
## IsMetroCity
##    0    1 
## 9472 3760

two way contingency tables

xtabs(~HasSwimmingPool+StarRating)
##                StarRating
## HasSwimmingPool    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7
##               0    8    8  392  616 5236    0   16    0 1272    0    0
##               1    8    0   48   16  717    8    0    8  480    8   24
##                StarRating
## HasSwimmingPool  3.8  3.9    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##               0    8    8  848    8    0    8   48    0    0   48
##               1    8   24 1615   16   16    0  328    8   16 1360
xtabs(~FreeWifi+StarRating)
##         StarRating
## FreeWifi    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9
##        0    0    0   80  104  336    0    0    0   96    0    0    0    0
##        1   16    8  360  528 5617    8   16    8 1656    8   24   16   32
##         StarRating
## FreeWifi    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##        0  231    0    0    0   24    0    0  110
##        1 2232   24   16    8  352    8   16 1298
xtabs(~FreeBreakfast+StarRating)
##              StarRating
## FreeBreakfast    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8
##             0   16    0  216  296 1789    0    8    0  661    8    0    8
##             1    0    8  224  336 4164    8    8    8 1091    0   24    8
##              StarRating
## FreeBreakfast  3.9    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##             0   16  783    0   16    0  224    8    0  594
##             1   16 1680   24    0    8  152    0   16  814
prop.table(xtabs(~IsMetroCity+FreeWifi),1)*100
##            FreeWifi
## IsMetroCity         0         1
##           0  8.847128 91.152872
##           1  3.803191 96.196809

Boxplot

boxplot(RoomRent,horizontal = TRUE,col = "green")

Distance from the airport

boxplot(Airport,horizontal = TRUE,col="red")

Hotel Capacity

boxplot(HotelCapacity,horizontal = TRUE,col="blue")

Histograms

hist(IsMetroCity,ylim = c(0,10000))

Roomrent

hist(RoomRent,xlab="Rent",ylab="no of rooms",xlim=c(0,100000),breaks = 100)

Room rent vs airport

plot(Airport~RoomRent)

Room rent vs hotel capacity

plot(HotelCapacity~RoomRent)

Room rent vs star rating

plot(StarRating~RoomRent)

Correlation Matrix

cor(hotel.df[,c(2:7,10:12,16:19)])
##                         Population      CityRank   IsMetroCity
## Population            1.0000000000 -0.8353204432  0.7712260105
## CityRank             -0.8353204432  1.0000000000 -0.5643937903
## IsMetroCity           0.7712260105 -0.5643937903  1.0000000000
## IsTouristDestination -0.0482029722  0.2807134520  0.1763717063
## IsWeekend             0.0115926802 -0.0072564766  0.0018118005
## IsNewYearEve          0.0007332482 -0.0006326444  0.0006464753
## RoomRent             -0.0887280632  0.0939855292 -0.0668397705
## StarRating            0.1341365933 -0.1333810133  0.0776028661
## Airport              -0.2597010198  0.5059119892 -0.2073586125
## FreeWifi              0.1129334410 -0.1214309404  0.0868288677
## FreeBreakfast         0.0364278235 -0.0086837497  0.0513856623
## HotelCapacity         0.2599830516 -0.2561197059  0.1871502153
## HasSwimmingPool       0.0262590820 -0.1029737518  0.0214119243
##                      IsTouristDestination    IsWeekend  IsNewYearEve
## Population                   -0.048202972  0.011592680  7.332482e-04
## CityRank                      0.280713452 -0.007256477 -6.326444e-04
## IsMetroCity                   0.176371706  0.001811801  6.464753e-04
## IsTouristDestination          1.000000000 -0.019481101 -2.266388e-03
## IsWeekend                    -0.019481101  1.000000000  2.923821e-01
## IsNewYearEve                 -0.002266388  0.292382051  1.000000e+00
## RoomRent                      0.122502963  0.004580134  3.849123e-02
## StarRating                   -0.040554998  0.006378436  2.360897e-03
## Airport                       0.194422049 -0.002724756  4.598872e-04
## FreeWifi                     -0.061568821  0.002960828  2.787472e-05
## FreeBreakfast                -0.071692559 -0.007612777 -2.606416e-03
## HotelCapacity                -0.094356091  0.006306507  1.352679e-03
## HasSwimmingPool               0.042156280  0.004500461  1.122308e-03
##                          RoomRent   StarRating       Airport      FreeWifi
## Population           -0.088728063  0.134136593 -0.2597010198  1.129334e-01
## CityRank              0.093985529 -0.133381013  0.5059119892 -1.214309e-01
## IsMetroCity          -0.066839771  0.077602866 -0.2073586125  8.682887e-02
## IsTouristDestination  0.122502963 -0.040554998  0.1944220492 -6.156882e-02
## IsWeekend             0.004580134  0.006378436 -0.0027247555  2.960828e-03
## IsNewYearEve          0.038491227  0.002360897  0.0004598872  2.787472e-05
## RoomRent              1.000000000  0.369373425  0.0496532442  3.627002e-03
## StarRating            0.369373425  1.000000000 -0.0609191837  1.800959e-02
## Airport               0.049653244 -0.060919184  1.0000000000 -9.452368e-02
## FreeWifi              0.003627002  0.018009594 -0.0945236768  1.000000e+00
## FreeBreakfast        -0.010006370 -0.032892463  0.0242839409  1.582206e-01
## HotelCapacity         0.157873308  0.637430337 -0.1176720722 -8.703612e-03
## HasSwimmingPool       0.311657734  0.618214699 -0.1416665606 -2.407405e-02
##                      FreeBreakfast HotelCapacity HasSwimmingPool
## Population             0.036427824   0.259983052     0.026259082
## CityRank              -0.008683750  -0.256119706    -0.102973752
## IsMetroCity            0.051385662   0.187150215     0.021411924
## IsTouristDestination  -0.071692559  -0.094356091     0.042156280
## IsWeekend             -0.007612777   0.006306507     0.004500461
## IsNewYearEve          -0.002606416   0.001352679     0.001122308
## RoomRent              -0.010006370   0.157873308     0.311657734
## StarRating            -0.032892463   0.637430337     0.618214699
## Airport                0.024283941  -0.117672072    -0.141666561
## FreeWifi               0.158220597  -0.008703612    -0.024074046
## FreeBreakfast          1.000000000  -0.087165446    -0.061522132
## HotelCapacity         -0.087165446   1.000000000     0.509045809
## HasSwimmingPool       -0.061522132   0.509045809     1.000000000

Corrgram

library(corrgram)
corrgram(x=cor(hotel.df[,c(2:7,10:12,16:19)]))

Scatter plot matrix

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(x=cor(hotel.df[,c(2:7,10:12,16:19)]))

Pearson`s Correlation Test

cor.test(IsMetroCity,RoomRent)
## 
##  Pearson's product-moment correlation
## 
## data:  IsMetroCity and RoomRent
## t = -7.7053, df = 13230, p-value = 1.399e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08378329 -0.04985761
## sample estimates:
##         cor 
## -0.06683977
cor.test(RoomRent,StarRating)
## 
##  Pearson's product-moment correlation
## 
## data:  RoomRent and StarRating
## t = 45.719, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3545660 0.3839956
## sample estimates:
##       cor 
## 0.3693734
cor.test(IsWeekend,RoomRent)
## 
##  Pearson's product-moment correlation
## 
## data:  IsWeekend and RoomRent
## t = 0.52682, df = 13230, p-value = 0.5983
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01245978  0.02161739
## sample estimates:
##         cor 
## 0.004580134
cor.test(IsTouristDestination,RoomRent)
## 
##  Pearson's product-moment correlation
## 
## data:  IsTouristDestination and RoomRent
## t = 14.197, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1056846 0.1392512
## sample estimates:
##      cor 
## 0.122503

T-Test

t.test(hotel.df$RoomRent,hotel.df$StarRating)
## 
##  Welch Two Sample t-test
## 
## data:  hotel.df$RoomRent and hotel.df$StarRating
## t = 85.813, df = 13231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5345.575 5595.491
## sample estimates:
##   mean of x   mean of y 
## 5473.991838    3.458933
t.test(hotel.df$RoomRent,hotel.df$HotelCapacity)
## 
##  Welch Two Sample t-test
## 
## data:  hotel.df$RoomRent and hotel.df$HotelCapacity
## t = 84.882, df = 13234, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5286.515 5536.445
## sample estimates:
##  mean of x  mean of y 
## 5473.99184   62.51164
#the p-value = 2.2e-16 (<0.05) We reject the Null hypothesis It Means Their exists a significant relation between Room Rent and Hotel Capacity