#Project : Various Factors affecting hotel room rent
 #Email : namitg16@gmail.com
#College : IIT Kanpur

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

hotels <- read.csv("Cities42.csv")
summary(hotels)
##        X              CityName      Population          CityRank    
##  Min.   :    1   Delhi    :2048   Min.   :    8096   Min.   : 0.00  
##  1st Qu.: 3309   Jaipur   : 768   1st Qu.:  744983   1st Qu.: 2.00  
##  Median : 6616   Mumbai   : 712   Median : 3046163   Median : 9.00  
##  Mean   : 6616   Bangalore: 656   Mean   : 4416837   Mean   :14.83  
##  3rd Qu.: 9924   Goa      : 624   3rd Qu.: 8443675   3rd Qu.:24.00  
##  Max.   :13232   Kochi    : 608   Max.   :12442373   Max.   :44.00  
##                  (Other)  :7816                                     
##   IsMetroCity     IsTouristDestination   IsWeekend       IsNewYearEve   
##  Min.   :0.0000   Min.   :0.0000       Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000       Median :1.0000   Median :0.0000  
##  Mean   :0.2842   Mean   :0.6972       Mean   :0.6228   Mean   :0.1244  
##  3rd Qu.:1.0000   3rd Qu.:1.0000       3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000       Max.   :1.0000   Max.   :1.0000  
##                                                                         
##           Date                       HotelName        RoomRent     
##  Dec 21 2016:1611   Vivanta by Taj        :   32   Min.   :   299  
##  Dec 24 2016:1611   Goldfinch Hotel       :   24   1st Qu.:  2436  
##  Dec 25 2016:1611   OYO Rooms             :   24   Median :  4000  
##  Dec 28 2016:1611   The Gordon House Hotel:   24   Mean   :  5474  
##  Dec 31 2016:1611   Apnayt Villa          :   16   3rd Qu.:  6299  
##  Dec 18 2016:1608   Bentleys Hotel Colaba :   16   Max.   :322500  
##  (Other)    :3569   (Other)               :13096                   
##    StarRating       Airport      
##  Min.   :0.000   Min.   :  0.20  
##  1st Qu.:3.000   1st Qu.:  8.40  
##  Median :3.000   Median : 15.00  
##  Mean   :3.459   Mean   : 21.16  
##  3rd Qu.:4.000   3rd Qu.: 24.00  
##  Max.   :5.000   Max.   :124.00  
##                                  
##                                                                    HotelAddress  
##  The Mall, Shimla                                                        :   32  
##  #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India:   16  
##  121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra         :   16  
##  14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta               :   16  
##  144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu        :   16  
##  17, Oliver Road, Colaba, Mumbai, Maharashtra                            :   16  
##  (Other)                                                                 :13120  
##   HotelPincode         HotelDescription    FreeWifi      FreeBreakfast   
##  Min.   : 100025   3           :  120   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 221001   Abc         :  112   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median : 395003   3-star hotel:  104   Median :1.0000   Median :1.0000  
##  Mean   : 397430   3.5         :   88   Mean   :0.9259   Mean   :0.6491  
##  3rd Qu.: 570001   4           :   72   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :7000157   (Other)     :12728   Max.   :1.0000   Max.   :1.0000  
##                    NA's        :    8                                    
##  HotelCapacity    HasSwimmingPool 
##  Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 16.00   1st Qu.:0.0000  
##  Median : 34.00   Median :0.0000  
##  Mean   : 62.51   Mean   :0.3558  
##  3rd Qu.: 75.00   3rd Qu.:1.0000  
##  Max.   :600.00   Max.   :1.0000  
## 
str(hotels)
## 'data.frame':    13232 obs. of  20 variables:
##  $ X                   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CityName            : Factor w/ 42 levels "Agra","Ahmedabad",..: 26 26 26 26 26 26 26 26 26 26 ...
##  $ Population          : int  12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 ...
##  $ CityRank            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IsMetroCity         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsTouristDestination: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsWeekend           : int  1 0 1 1 0 1 0 1 1 0 ...
##  $ IsNewYearEve        : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ Date                : Factor w/ 20 levels "18-Dec-16","21-Dec-16",..: 11 12 13 14 15 16 17 18 11 12 ...
##  $ HotelName           : Factor w/ 1670 levels "14 Square Amanora",..: 1635 1635 1635 1635 1635 1635 1635 1635 1409 1409 ...
##  $ RoomRent            : int  12375 10250 9900 10350 12000 11475 11220 9225 6800 9350 ...
##  $ StarRating          : num  5 5 5 5 5 5 5 5 4 4 ...
##  $ Airport             : num  21 21 21 21 21 21 21 21 20 20 ...
##  $ HotelAddress        : Factor w/ 2108 levels " H.P. High Court Mall Road, Shimla",..: 925 928 930 933 935 937 940 941 699 746 ...
##  $ HotelPincode        : int  400005 400006 400007 400008 400009 400010 400011 400012 400039 400040 ...
##  $ HotelDescription    : Factor w/ 1226 levels "#NAME?","10 star hotel near Queensroad, Amritsar",..: 1030 1030 1030 1030 1030 1030 1030 1030 1006 1006 ...
##  $ FreeWifi            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FreeBreakfast       : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ HotelCapacity       : int  287 287 287 287 287 287 287 287 28 28 ...
##  $ HasSwimmingPool     : int  1 1 1 1 1 1 1 1 0 0 ...

Including Plots

You can also embed plots, for example:

## Warning: package 'psych' was built under R version 3.4.3
##                      vars     n       mean         sd    median    trimmed
## X                       1 13232    6616.50    3819.89    6616.5    6616.50
## CityName*               2 13232      18.07      11.72      16.0      17.29
## Population              3 13232 4416836.87 4258386.00 3046163.0 4040816.22
## CityRank                4 13232      14.83      13.51       9.0      13.30
## IsMetroCity             5 13232       0.28       0.45       0.0       0.23
## IsTouristDestination    6 13232       0.70       0.46       1.0       0.75
## IsWeekend               7 13232       0.62       0.48       1.0       0.65
## IsNewYearEve            8 13232       0.12       0.33       0.0       0.03
## Date*                   9 13232      14.26       2.82      14.0      14.39
## HotelName*             10 13232     841.19     488.16     827.0     841.18
## RoomRent               11 13232    5473.99    7333.12    4000.0    4383.33
## StarRating             12 13232       3.46       0.76       3.0       3.40
## Airport                13 13232      21.16      22.76      15.0      16.39
## HotelAddress*          14 13232    1202.53     582.17    1261.0    1233.25
## HotelPincode           15 13232  397430.26  259837.50  395003.0  388540.47
## HotelDescription*      16 13224     581.34     363.26     567.0     575.37
## FreeWifi               17 13232       0.93       0.26       1.0       1.00
## FreeBreakfast          18 13232       0.65       0.48       1.0       0.69
## HotelCapacity          19 13232      62.51      76.66      34.0      46.03
## HasSwimmingPool        20 13232       0.36       0.48       0.0       0.32
##                             mad      min      max      range  skew
## X                       4904.44      1.0    13232    13231.0  0.00
## CityName*                 11.86      1.0       42       41.0  0.48
## Population           3846498.95   8096.0 12442373 12434277.0  0.68
## CityRank                  11.86      0.0       44       44.0  0.69
## IsMetroCity                0.00      0.0        1        1.0  0.96
## IsTouristDestination       0.00      0.0        1        1.0 -0.86
## IsWeekend                  0.00      0.0        1        1.0 -0.51
## IsNewYearEve               0.00      0.0        1        1.0  2.28
## Date*                      2.97      1.0       20       19.0 -1.05
## HotelName*               641.97      1.0     1670     1669.0  0.01
## RoomRent                2653.85    299.0   322500   322201.0 16.75
## StarRating                 0.74      0.0        5        5.0  0.48
## Airport                   11.12      0.2      124      123.8  2.73
## HotelAddress*            668.65      1.0     2108     2107.0 -0.37
## HotelPincode          257975.37 100025.0  7000157  6900132.0  9.99
## HotelDescription*        472.95      1.0     1226     1225.0  0.11
## FreeWifi                   0.00      0.0        1        1.0 -3.25
## FreeBreakfast              0.00      0.0        1        1.0 -0.62
## HotelCapacity             28.17      0.0      600      600.0  2.95
## HasSwimmingPool            0.00      0.0        1        1.0  0.60
##                      kurtosis       se
## X                       -1.20    33.21
## CityName*               -0.88     0.10
## Population              -1.08 37019.65
## CityRank                -0.76     0.12
## IsMetroCity             -1.08     0.00
## IsTouristDestination    -1.26     0.00
## IsWeekend               -1.74     0.00
## IsNewYearEve             3.18     0.00
## Date*                    2.93     0.02
## HotelName*              -1.25     4.24
## RoomRent               582.06    63.75
## StarRating               0.25     0.01
## Airport                  7.89     0.20
## HotelAddress*           -0.88     5.06
## HotelPincode           249.76  2258.86
## HotelDescription*       -1.25     3.16
## FreeWifi                 8.57     0.00
## FreeBreakfast           -1.61     0.00
## HotelCapacity           11.39     0.67
## HasSwimmingPool         -1.64     0.00
          One way contigency tables
table1 <-xtabs(~CityRank,data = hotels)
table1
## CityRank
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
##  712 2048  656  416  536  424  512   80  600  768   32  128   16  136  160 
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
##  432  448  624  128  264   40  224  336  392   48  160  120  272  104  456 
##   32   33   34   35   36   37   38   39   40   42   43   44 
##   48   56  280   64  136   88  128  136  264  144  328  288
table2 <- xtabs(~IsMetroCity,data = hotels)
table2
## IsMetroCity
##    0    1 
## 9472 3760
table3 <- xtabs(~FreeWifi,data = hotels)
table3
## FreeWifi
##     0     1 
##   981 12251
table4 <- xtabs(~IsWeekend,data = hotels)
table4
## IsWeekend
##    0    1 
## 4991 8241
                        Two Way Contigency Tables
              
table5 <- xtabs(~HasSwimmingPool+StarRating,data = hotels)
table5
##                StarRating
## HasSwimmingPool    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7
##               0    8    8  392  616 5236    0   16    0 1272    0    0
##               1    8    0   48   16  717    8    0    8  480    8   24
##                StarRating
## HasSwimmingPool  3.8  3.9    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##               0    8    8  848    8    0    8   48    0    0   48
##               1    8   24 1615   16   16    0  328    8   16 1360
table6 <- xtabs(~FreeWifi+StarRating,data = hotels)
table6
##         StarRating
## FreeWifi    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9
##        0    0    0   80  104  336    0    0    0   96    0    0    0    0
##        1   16    8  360  528 5617    8   16    8 1656    8   24   16   32
##         StarRating
## FreeWifi    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##        0  231    0    0    0   24    0    0  110
##        1 2232   24   16    8  352    8   16 1298
hist(hotels$RoomRent,col="red",main="Room Rent for different rooms",xlab="rent",breaks=200,xlim = c(0,50000))

hist(hotels$StarRating,col="green",xlab="Rating of hotels",main="Star ratings",breaks = 5)

hist(hotels$Population,col="yellow",main="Population of city containing the hotel ",xlab="People in the city")

hist(hotels$Airport,main="Distance from airport",xlab="Distance in kms",col="blue",breaks=12)

boxplot(hotels$StarRating~hotels$FreeWifi,main="No of hotels with free breakfast",xlab="City Name",col="purple",horizontal = TRUE)

hist(hotels$HotelCapacity,main="Capacity of hotels",xlab="Capacity",col="red",ylab="No of hotels")

hotels1 <- subset(hotels,select=c(3,4,5,6,7,8,11,12,13,15,17,18,19,20))
str(hotels1)
## 'data.frame':    13232 obs. of  14 variables:
##  $ Population          : int  12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 ...
##  $ CityRank            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IsMetroCity         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsTouristDestination: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsWeekend           : int  1 0 1 1 0 1 0 1 1 0 ...
##  $ IsNewYearEve        : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ RoomRent            : int  12375 10250 9900 10350 12000 11475 11220 9225 6800 9350 ...
##  $ StarRating          : num  5 5 5 5 5 5 5 5 4 4 ...
##  $ Airport             : num  21 21 21 21 21 21 21 21 20 20 ...
##  $ HotelPincode        : int  400005 400006 400007 400008 400009 400010 400011 400012 400039 400040 ...
##  $ FreeWifi            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FreeBreakfast       : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ HotelCapacity       : int  287 287 287 287 287 287 287 287 28 28 ...
##  $ HasSwimmingPool     : int  1 1 1 1 1 1 1 1 0 0 ...
round(cor(hotels1),2)
##                      Population CityRank IsMetroCity IsTouristDestination
## Population                 1.00    -0.84        0.77                -0.05
## CityRank                  -0.84     1.00       -0.56                 0.28
## IsMetroCity                0.77    -0.56        1.00                 0.18
## IsTouristDestination      -0.05     0.28        0.18                 1.00
## IsWeekend                  0.01    -0.01        0.00                -0.02
## IsNewYearEve               0.00     0.00        0.00                 0.00
## RoomRent                  -0.09     0.09       -0.07                 0.12
## StarRating                 0.13    -0.13        0.08                -0.04
## Airport                   -0.26     0.51       -0.21                 0.19
## HotelPincode              -0.26     0.14       -0.18                -0.17
## FreeWifi                   0.11    -0.12        0.09                -0.06
## FreeBreakfast              0.04    -0.01        0.05                -0.07
## HotelCapacity              0.26    -0.26        0.19                -0.09
## HasSwimmingPool            0.03    -0.10        0.02                 0.04
##                      IsWeekend IsNewYearEve RoomRent StarRating Airport
## Population                0.01         0.00    -0.09       0.13   -0.26
## CityRank                 -0.01         0.00     0.09      -0.13    0.51
## IsMetroCity               0.00         0.00    -0.07       0.08   -0.21
## IsTouristDestination     -0.02         0.00     0.12      -0.04    0.19
## IsWeekend                 1.00         0.29     0.00       0.01    0.00
## IsNewYearEve              0.29         1.00     0.04       0.00    0.00
## RoomRent                  0.00         0.04     1.00       0.37    0.05
## StarRating                0.01         0.00     0.37       1.00   -0.06
## Airport                   0.00         0.00     0.05      -0.06    1.00
## HotelPincode             -0.01         0.00     0.01      -0.01    0.22
## FreeWifi                  0.00         0.00     0.00       0.02   -0.09
## FreeBreakfast            -0.01         0.00    -0.01      -0.03    0.02
## HotelCapacity             0.01         0.00     0.16       0.64   -0.12
## HasSwimmingPool           0.00         0.00     0.31       0.62   -0.14
##                      HotelPincode FreeWifi FreeBreakfast HotelCapacity
## Population                  -0.26     0.11          0.04          0.26
## CityRank                     0.14    -0.12         -0.01         -0.26
## IsMetroCity                 -0.18     0.09          0.05          0.19
## IsTouristDestination        -0.17    -0.06         -0.07         -0.09
## IsWeekend                   -0.01     0.00         -0.01          0.01
## IsNewYearEve                 0.00     0.00          0.00          0.00
## RoomRent                     0.01     0.00         -0.01          0.16
## StarRating                  -0.01     0.02         -0.03          0.64
## Airport                      0.22    -0.09          0.02         -0.12
## HotelPincode                 1.00    -0.01          0.02         -0.04
## FreeWifi                    -0.01     1.00          0.16         -0.01
## FreeBreakfast                0.02     0.16          1.00         -0.09
## HotelCapacity               -0.04    -0.01         -0.09          1.00
## HasSwimmingPool              0.02    -0.02         -0.06          0.51
##                      HasSwimmingPool
## Population                      0.03
## CityRank                       -0.10
## IsMetroCity                     0.02
## IsTouristDestination            0.04
## IsWeekend                       0.00
## IsNewYearEve                    0.00
## RoomRent                        0.31
## StarRating                      0.62
## Airport                        -0.14
## HotelPincode                    0.02
## FreeWifi                       -0.02
## FreeBreakfast                  -0.06
## HotelCapacity                   0.51
## HasSwimmingPool                 1.00
cor(hotels1$RoomRent,hotels1)
##       Population   CityRank IsMetroCity IsTouristDestination   IsWeekend
## [1,] -0.08872806 0.09398553 -0.06683977             0.122503 0.004580134
##      IsNewYearEve RoomRent StarRating    Airport HotelPincode    FreeWifi
## [1,]   0.03849123        1  0.3693734 0.04965324  0.009262712 0.003627002
##      FreeBreakfast HotelCapacity HasSwimmingPool
## [1,]   -0.01000637     0.1578733       0.3116577
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(hotels,order = TRUE,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt,main="Correlation of various factors")

library(car)
## Warning: package 'car' was built under R version 3.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula= ~FreeWifi+FreeBreakfast+StarRating+RoomRent ,main="Scatterplots of all variables",data = hotels)

T-Test

t.test(RoomRent~FreeWifi,data = hotels)
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by FreeWifi
## t = -0.76847, df = 1804.7, p-value = 0.4423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -360.5977  157.5701
## sample estimates:
## mean in group 0 mean in group 1 
##        5380.004        5481.518