setwd("C:/Users/Dell/Downloads/Sameer Mathur")
hotel.df<- read.csv("Cities42.csv")
View(hotel.df)
hotel.df$Date<-gsub("18-Dec-16", "Dec 18 2016", hotel.df$Date)
hotel.df$Date<-gsub("21-Dec-16", "Dec 21 2016", hotel.df$Date)
hotel.df$Date<-gsub("24-Dec-16", "Dec 24 2016", hotel.df$Date)
hotel.df$Date<-gsub("25-Dec-16", "Dec 25 2016", hotel.df$Date)
hotel.df$Date<-gsub("28-Dec-16", "Dec 28 2016", hotel.df$Date)
hotel.df$Date<-gsub("31-Dec-16", "Dec 31 2016", hotel.df$Date)
hotel.df$Date<-gsub("4-Jan-17", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("4-Jan-16", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("8-Jan-16", "Jan 08 2017", hotel.df$Date)
hotel.df$Date<-gsub("8-Jan-17", "Jan 08 2017", hotel.df$Date)
hotel.df$Date<-gsub("Jan 4 2017", "Jan 04 2017", hotel.df$Date)
hotel.df$Date<-gsub("Jan 8 2017", "Jan 08 2017", hotel.df$Date)
table(hotel.df$Date)
## 
## 0Jan 04 2017 0Jan 08 2017  Dec 18 2016  Dec 21 2016  Dec 24 2016 
##           44           44         1652         1655         1655 
##  Dec 25 2016  Dec 28 2016  Dec 31 2016  Jan 04 2017  Jan 08 2017 
##         1655         1655         1655         1608         1609
hotel.df$Date<-factor(hotel.df$Date)
is.factor(hotel.df$Date)
## [1] TRUE
levels(hotel.df$Date)
##  [1] "0Jan 04 2017" "0Jan 08 2017" "Dec 18 2016"  "Dec 21 2016" 
##  [5] "Dec 24 2016"  "Dec 25 2016"  "Dec 28 2016"  "Dec 31 2016" 
##  [9] "Jan 04 2017"  "Jan 08 2017"
summary(hotel.df)
##       CityName      Population          CityRank      IsMetroCity    
##  Delhi    :2048   Min.   :    8096   Min.   : 0.00   Min.   :0.0000  
##  Jaipur   : 768   1st Qu.:  744983   1st Qu.: 2.00   1st Qu.:0.0000  
##  Mumbai   : 712   Median : 3046163   Median : 9.00   Median :0.0000  
##  Bangalore: 656   Mean   : 4416837   Mean   :14.83   Mean   :0.2842  
##  Goa      : 624   3rd Qu.: 8443675   3rd Qu.:24.00   3rd Qu.:1.0000  
##  Kochi    : 608   Max.   :12442373   Max.   :44.00   Max.   :1.0000  
##  (Other)  :7816                                                      
##  IsTouristDestination   IsWeekend       IsNewYearEve             Date     
##  Min.   :0.0000       Min.   :0.0000   Min.   :0.0000   Dec 21 2016:1655  
##  1st Qu.:0.0000       1st Qu.:0.0000   1st Qu.:0.0000   Dec 24 2016:1655  
##  Median :1.0000       Median :1.0000   Median :0.0000   Dec 25 2016:1655  
##  Mean   :0.6972       Mean   :0.6228   Mean   :0.1244   Dec 28 2016:1655  
##  3rd Qu.:1.0000       3rd Qu.:1.0000   3rd Qu.:0.0000   Dec 31 2016:1655  
##  Max.   :1.0000       Max.   :1.0000   Max.   :1.0000   Dec 18 2016:1652  
##                                                         (Other)    :3305  
##                   HotelName        RoomRent        StarRating   
##  Vivanta by Taj        :   32   Min.   :   299   Min.   :0.000  
##  Goldfinch Hotel       :   24   1st Qu.:  2436   1st Qu.:3.000  
##  OYO Rooms             :   24   Median :  4000   Median :3.000  
##  The Gordon House Hotel:   24   Mean   :  5474   Mean   :3.459  
##  Apnayt Villa          :   16   3rd Qu.:  6299   3rd Qu.:4.000  
##  Bentleys Hotel Colaba :   16   Max.   :322500   Max.   :5.000  
##  (Other)               :13096                                   
##     Airport      
##  Min.   :  0.20  
##  1st Qu.:  8.40  
##  Median : 15.00  
##  Mean   : 21.16  
##  3rd Qu.: 24.00  
##  Max.   :124.00  
##                  
##                                                                    HotelAddress  
##  The Mall, Shimla                                                        :   32  
##  #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India:   16  
##  121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra         :   16  
##  14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta               :   16  
##  144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu        :   16  
##  17, Oliver Road, Colaba, Mumbai, Maharashtra                            :   16  
##  (Other)                                                                 :13120  
##   HotelPincode         HotelDescription    FreeWifi      FreeBreakfast   
##  Min.   : 100025   3           :  120   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 221001   Abc         :  112   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median : 395003   3-star hotel:  104   Median :1.0000   Median :1.0000  
##  Mean   : 397430   3.5         :   88   Mean   :0.9259   Mean   :0.6491  
##  3rd Qu.: 570001   4           :   72   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :7000157   (Other)     :12728   Max.   :1.0000   Max.   :1.0000  
##                    NA's        :    8                                    
##  HotelCapacity    HasSwimmingPool 
##  Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 16.00   1st Qu.:0.0000  
##  Median : 34.00   Median :0.0000  
##  Mean   : 62.51   Mean   :0.3558  
##  3rd Qu.: 75.00   3rd Qu.:1.0000  
##  Max.   :600.00   Max.   :1.0000  
## 
library(psych)
describe(hotel.df)
metro<- table(hotel.df$IsMetroCity)
metro
## 
##    0    1 
## 9472 3760
tourist<- table(hotel.df$IsTouristDestination)
tourist
## 
##    0    1 
## 4007 9225
weekend<- table(hotel.df$IsWeekend)
weekend
## 
##    0    1 
## 4991 8241
newyear<- table(hotel.df$IsNewYearEve)
newyear
## 
##     0     1 
## 11586  1646
date<- table(hotel.df$Date)
date
## 
## 0Jan 04 2017 0Jan 08 2017  Dec 18 2016  Dec 21 2016  Dec 24 2016 
##           44           44         1652         1655         1655 
##  Dec 25 2016  Dec 28 2016  Dec 31 2016  Jan 04 2017  Jan 08 2017 
##         1655         1655         1655         1608         1609
starrating<- table(hotel.df$StarRating)
starrating
## 
##    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9    4  4.1 
##   16    8  440  632 5953    8   16    8 1752    8   24   16   32 2463   24 
##  4.3  4.4  4.5  4.7  4.8    5 
##   16    8  376    8   16 1408
airport<- table(hotel.df$Airport)
airport
## 
##   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9     1   1.1   1.2   1.4 
##    16    32    40    24    32    24     8    39    32    16    40     8 
##   1.5   1.6   1.7   1.8   1.9     2   2.1   2.2   2.3   2.4   2.5   2.6 
##    16    32    22    72    40    56    40    24    16    32    56    48 
##   2.7   2.8   2.9     3   3.1   3.2   3.3   3.4   3.5   3.6   3.7   3.8 
##    56    24    56    56    16    24    16    48    56    64    16    40 
##   3.9     4   4.1   4.2   4.3   4.4   4.5   4.6   4.7   4.8   4.9     5 
##    32    72    32    40    32    32    24    40    24    40    32    73 
##   5.1   5.2   5.3   5.4   5.5   5.6   5.7   5.8   5.9     6   6.1   6.2 
##    72    72    32    40    48    40    32    56    40    33    32    64 
##   6.3   6.4   6.5   6.6   6.7   6.8   6.9     7   7.1   7.2   7.3   7.4 
##    16    48    48    40    24    56    40    49    72    48    24    40 
##   7.5   7.6   7.7   7.8   7.9     8   8.1   8.2   8.3   8.4   8.5   8.6 
##    48    71    48    32    72    73    72    56    40    48    64    16 
##   8.7   8.8   8.9     9   9.1   9.2   9.3   9.4   9.5   9.6   9.7   9.8 
##    56    16    16    49    24    62    48    80    22    40    24    40 
##   9.9    10  10.2  10.3  10.4  10.6  10.7  10.8  10.9    11  11.1  11.3 
##    56   298     8     8     8     8     8     8    16   610    16    16 
##  11.7  11.9    12  12.2  12.3  12.6  12.7    13  13.1  13.3  13.5  13.6 
##     8    16   354    24     8    24    16   319    16     8     8    24 
##  13.7  13.8    14  14.2  14.4  14.5  14.6  14.7  14.8  14.9    15  15.3 
##    16     8   399    16    24     8    16    24    16     8   441    16 
##  15.4  15.6  15.7  15.8  15.9    16  16.1  16.2  16.4  16.5  16.7    17 
##    16     8     8     8     8   409    16     8     8    32    32   313 
##  17.1  17.2  17.4  17.5  17.6  17.8    18  18.3  18.5  18.6  18.7    19 
##     8    16     8    16     8    16   424     8    16     8     8   200 
##  19.5  19.9    20  20.2  20.3  20.5  20.9    21  21.4  21.5    22  22.1 
##     8     8   384     8    16     8     8   248    24     8   305     8 
##  22.2  22.4  22.5    23  23.2  23.3  23.4    24  24.2  24.3  24.5  24.6 
##     8     8     8   304     8    16     8   167    16    16    16     8 
##  24.7  24.9    25  25.6  25.7  25.9    26  26.1  26.3  26.4  26.5  26.7 
##     8    32   208     8     8     8   300     8     8    24     8     8 
##    27  27.1  27.2    28  28.1  28.6  28.7    29    30  30.5    31  31.2 
##   272     8     8   112     8     8     8    88    56     8   224     8 
##  31.3  31.9    32  32.9    33  33.4    34    35    36  36.2    37    38 
##    16     8    72     8    40    16    16    49    17     8    49    49 
##  38.3    39  39.9    40    41    42  42.7    43  43.9    44  44.5  44.6 
##     8   100     8    56   102    41    16    33     8     8     8     8 
##  44.8    46    47  47.5    48  48.4    49    50  50.1  50.5    51    52 
##     8    40     8     8    16     8     8     8     8     8    16    16 
##  52.7    53    55  57.2    60    61    62    63  63.5  63.6    65  67.6 
##     8     8     8     8     8    16    32     8     8     8   152     8 
##    69  73.1    80  80.3    81    82    83    84    85    86    87  91.3 
##     8     8     1     8     1     9     1     1     1     1     1     8 
##  96.5   100 102.4   105   110 117.4   124 
##     8   136     8   240    64     8   128
wifi<- table(hotel.df$FreeWifi)
wifi
## 
##     0     1 
##   981 12251
breakfast<- table(hotel.df$FreeBreakfast)
breakfast
## 
##    0    1 
## 4643 8589
capacity<- table(hotel.df$HotelCapacity)
capacity
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
##    8   32   16   48   32  104   72   32  152   88  521  120  112   40   96 
##   15   16   17   18   19   20   21   22   23   24   25   26   27   28   29 
## 1782  192  112  168  169  296  176  192  128  289  240  144  160  176   86 
##   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44 
##  296  136  264  104  120  112  168   56  120  144  256   96  144   73  128 
##   45   46   47   48   49   50   51   52   53   54   55   56   57   58   59 
##  144   72   24  160   96  104   40  120   24   72   72   48   40   40   48 
##   60   61   62   63   64   65   66   67   68   69   70   71   72   73   74 
##  128   40   24   40   56   88   40   80   48   48   72   30   72   32   16 
##   75   76   77   78   79   80   81   82   83   84   85   86   87   88   89 
##   32   40   48   40   24   88   32   40   32   24   64    8   32   48    8 
##   90   91   92   93   94   95   96   97   98   99  100  101  102  103  104 
##   40    8   48   24   32   16   16   16   48   32   78   16   40   16   64 
##  106  107  108  109  110  111  112  113  114  115  117  119  120  121  122 
##   48   24   48   16   24   16   64   16   24   32   48   24   16    8   16 
##  124  126  127  128  129  130  132  133  134  135  136  137  138  139  140 
##    8   16   24   24   40   31   56    8    8   16    8   32    8    8   40 
##  141  142  144  145  147  148  149  150  151  153  154  155  159  160  162 
##   32   24   16   24   16    8   24   48   24   15   24   16    8   24    8 
##  164  165  166  167  170  171  172  173  176  177  178  179  180  181  182 
##   24    8    8   16   16    8   24   16    8   16   32   16    8   24    8 
##  183  184  187  189  190  191  195  196  197  198  199  200  201  202  203 
##   16   40    8    8    7    8    8    8    8   16    8   24    8   32    8 
##  204  205  207  208  209  210  211  212  214  215  216  218  220  222  223 
##   16    8    8   24    8    8   24    8   24    8    8   32   24    8    8 
##  228  229  230  231  232  233  234  235  236  237  240  244  247  248  251 
##    8    8    8    8    8    8    8    8    8    8    8   24    8    8    8 
##  254  255  260  261  264  267  269  270  273  279  281  286  287  292  293 
##    8    8    8    8   16    1    8    8    8    8    8    8   16    8    8 
##  302  310  311  317  323  324  326  327  334  340  341  385  390  393  400 
##    8    8    8    6    8    8    8   24    8    8    8    8    8    8    8 
##  403  411  414  419  436  451  461  480  493  507  523  550  560  600 
##    8   16    8    8    8    8    8    8   16    8   16    8   16    8
pool<- table(hotel.df$HasSwimmingPool)
pool
## 
##    0    1 
## 8524 4708
barplot(starrating)

barplot(capacity)

barplot(pool)

plot(hotel.df$RoomRent,hotel.df$StarRating)

plot(hotel.df$RoomRent,hotel.df$HotelCapacity)

plot(hotel.df$RoomRent,hotel.df$HasSwimmingPool)

library(corrgram)
corrgram(hotel.df,order=TRUE,lower.panel = panel.shade, upper.panel = panel.pie,text.panel = panel.txt,main="Corrgram of Hotel Room Pricing factors")

t.test(RoomRent~HasSwimmingPool,data = hotel.df)
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by HasSwimmingPool
## t = -29.013, df = 5011.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5096.030 -4450.942
## sample estimates:
## mean in group 0 mean in group 1 
##        3775.566        8549.052
t.test(hotel.df$RoomRent,hotel.df$HotelCapacity)
## 
##  Welch Two Sample t-test
## 
## data:  hotel.df$RoomRent and hotel.df$HotelCapacity
## t = 84.882, df = 13234, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5286.515 5536.445
## sample estimates:
##  mean of x  mean of y 
## 5473.99184   62.51164
t.test(RoomRent~FreeBreakfast, data = hotel.df)
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by FreeBreakfast
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -153.5017  460.9935
## sample estimates:
## mean in group 0 mean in group 1 
##        5573.790        5420.044
t.test(hotel.df$RoomRent,hotel.df$StarRating)
## 
##  Welch Two Sample t-test
## 
## data:  hotel.df$RoomRent and hotel.df$StarRating
## t = 85.813, df = 13231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5345.575 5595.491
## sample estimates:
##   mean of x   mean of y 
## 5473.991838    3.458933
t.test(RoomRent~IsMetroCity, data = hotel.df)
## 
##  Welch Two Sample t-test
## 
## data:  RoomRent by IsMetroCity
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   888.0308 1285.4102
## sample estimates:
## mean in group 0 mean in group 1 
##        5782.794        4696.073