PROJECT-TITLE: “Hotel Room Pricing Analysis In The Indian Market.”

NAME: “Kathala Krishna Chaitanya rao.”

EMAIL: “krishnakathala@gmail.com

COLLEGE: “Jyothismathi institute of technology & Sciences,Affiliated to JNTUH University.”

             ------------pricing strategy of hotels in the Indian hotel industry----------
#Read Data using read.csv and view Hotel room pricing.
 Cities.df <- read.csv(paste("Cities42.csv", sep=""))
 View(Cities.df)
#Summarizing the data
library(psych)
describe(Cities.df)
##                      vars     n       mean         sd  median    trimmed
## CityName*               1 13232      18.07      11.72      16      17.29
## Population              2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank                3 13232      14.83      13.51       9      13.30
## IsMetroCity             4 13232       0.28       0.45       0       0.23
## IsTouristDestination    5 13232       0.70       0.46       1       0.75
## IsWeekend               6 13232       0.62       0.48       1       0.65
## IsNewYearEve            7 13232       0.12       0.33       0       0.03
## Date*                   8 13232      14.30       2.69      14      14.39
## HotelName*              9 13232     841.19     488.16     827     841.18
## RoomRent               10 13232    5473.99    7333.12    4000    4383.33
## StarRating             11 13232       3.46       0.76       3       3.40
## Airport                12 13232      21.16      22.76      15      16.39
## HotelAddress*          13 13232    1202.53     582.17    1261    1233.25
## HotelPincode           14 13232  397430.26  259837.50  395003  388540.47
## HotelDescription*      15 13224     581.34     363.26     567     575.37
## FreeWifi               16 13232       0.93       0.26       1       1.00
## FreeBreakfast          17 13232       0.65       0.48       1       0.69
## HotelCapacity          18 13232      62.51      76.66      34      46.03
## HasSwimmingPool        19 13232       0.36       0.48       0       0.32
##                             mad      min      max      range  skew
## CityName*                 11.86      1.0       42       41.0  0.48
## Population           3846498.95   8096.0 12442373 12434277.0  0.68
## CityRank                  11.86      0.0       44       44.0  0.69
## IsMetroCity                0.00      0.0        1        1.0  0.96
## IsTouristDestination       0.00      0.0        1        1.0 -0.86
## IsWeekend                  0.00      0.0        1        1.0 -0.51
## IsNewYearEve               0.00      0.0        1        1.0  2.28
## Date*                      2.97      1.0       20       19.0 -0.77
## HotelName*               641.97      1.0     1670     1669.0  0.01
## RoomRent                2653.85    299.0   322500   322201.0 16.75
## StarRating                 0.74      0.0        5        5.0  0.48
## Airport                   11.12      0.2      124      123.8  2.73
## HotelAddress*            668.65      1.0     2108     2107.0 -0.37
## HotelPincode          257975.37 100025.0  7000157  6900132.0  9.99
## HotelDescription*        472.95      1.0     1226     1225.0  0.11
## FreeWifi                   0.00      0.0        1        1.0 -3.25
## FreeBreakfast              0.00      0.0        1        1.0 -0.62
## HotelCapacity             28.17      0.0      600      600.0  2.95
## HasSwimmingPool            0.00      0.0        1        1.0  0.60
##                      kurtosis       se
## CityName*               -0.88     0.10
## Population              -1.08 37019.65
## CityRank                -0.76     0.12
## IsMetroCity             -1.08     0.00
## IsTouristDestination    -1.26     0.00
## IsWeekend               -1.74     0.00
## IsNewYearEve             3.18     0.00
## Date*                    1.92     0.02
## HotelName*              -1.25     4.24
## RoomRent               582.06    63.75
## StarRating               0.25     0.01
## Airport                  7.89     0.20
## HotelAddress*           -0.88     5.06
## HotelPincode           249.76  2258.86
## HotelDescription*       -1.25     3.16
## FreeWifi                 8.57     0.00
## FreeBreakfast           -1.61     0.00
## HotelCapacity           11.39     0.67
## HasSwimmingPool         -1.64     0.00
#Checking the dimesnsions of the dataset
dim(Cities.df)
## [1] 13232    19
#Summarizing the data
library(psych)
describe(Cities.df)
##                      vars     n       mean         sd  median    trimmed
## CityName*               1 13232      18.07      11.72      16      17.29
## Population              2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank                3 13232      14.83      13.51       9      13.30
## IsMetroCity             4 13232       0.28       0.45       0       0.23
## IsTouristDestination    5 13232       0.70       0.46       1       0.75
## IsWeekend               6 13232       0.62       0.48       1       0.65
## IsNewYearEve            7 13232       0.12       0.33       0       0.03
## Date*                   8 13232      14.30       2.69      14      14.39
## HotelName*              9 13232     841.19     488.16     827     841.18
## RoomRent               10 13232    5473.99    7333.12    4000    4383.33
## StarRating             11 13232       3.46       0.76       3       3.40
## Airport                12 13232      21.16      22.76      15      16.39
## HotelAddress*          13 13232    1202.53     582.17    1261    1233.25
## HotelPincode           14 13232  397430.26  259837.50  395003  388540.47
## HotelDescription*      15 13224     581.34     363.26     567     575.37
## FreeWifi               16 13232       0.93       0.26       1       1.00
## FreeBreakfast          17 13232       0.65       0.48       1       0.69
## HotelCapacity          18 13232      62.51      76.66      34      46.03
## HasSwimmingPool        19 13232       0.36       0.48       0       0.32
##                             mad      min      max      range  skew
## CityName*                 11.86      1.0       42       41.0  0.48
## Population           3846498.95   8096.0 12442373 12434277.0  0.68
## CityRank                  11.86      0.0       44       44.0  0.69
## IsMetroCity                0.00      0.0        1        1.0  0.96
## IsTouristDestination       0.00      0.0        1        1.0 -0.86
## IsWeekend                  0.00      0.0        1        1.0 -0.51
## IsNewYearEve               0.00      0.0        1        1.0  2.28
## Date*                      2.97      1.0       20       19.0 -0.77
## HotelName*               641.97      1.0     1670     1669.0  0.01
## RoomRent                2653.85    299.0   322500   322201.0 16.75
## StarRating                 0.74      0.0        5        5.0  0.48
## Airport                   11.12      0.2      124      123.8  2.73
## HotelAddress*            668.65      1.0     2108     2107.0 -0.37
## HotelPincode          257975.37 100025.0  7000157  6900132.0  9.99
## HotelDescription*        472.95      1.0     1226     1225.0  0.11
## FreeWifi                   0.00      0.0        1        1.0 -3.25
## FreeBreakfast              0.00      0.0        1        1.0 -0.62
## HotelCapacity             28.17      0.0      600      600.0  2.95
## HasSwimmingPool            0.00      0.0        1        1.0  0.60
##                      kurtosis       se
## CityName*               -0.88     0.10
## Population              -1.08 37019.65
## CityRank                -0.76     0.12
## IsMetroCity             -1.08     0.00
## IsTouristDestination    -1.26     0.00
## IsWeekend               -1.74     0.00
## IsNewYearEve             3.18     0.00
## Date*                    1.92     0.02
## HotelName*              -1.25     4.24
## RoomRent               582.06    63.75
## StarRating               0.25     0.01
## Airport                  7.89     0.20
## HotelAddress*           -0.88     5.06
## HotelPincode           249.76  2258.86
## HotelDescription*       -1.25     3.16
## FreeWifi                 8.57     0.00
## FreeBreakfast           -1.61     0.00
## HotelCapacity           11.39     0.67
## HasSwimmingPool         -1.64     0.00
#To find the datatypes
str(Cities.df)
## 'data.frame':    13232 obs. of  19 variables:
##  $ CityName            : Factor w/ 42 levels "Agra","Ahmedabad",..: 26 26 26 26 26 26 26 26 26 26 ...
##  $ Population          : int  12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 ...
##  $ CityRank            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IsMetroCity         : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsTouristDestination: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ IsWeekend           : int  1 0 1 1 0 1 0 1 1 0 ...
##  $ IsNewYearEve        : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ Date                : Factor w/ 20 levels "04-Jan-16","04-Jan-17",..: 11 12 13 14 15 16 17 18 11 12 ...
##  $ HotelName           : Factor w/ 1670 levels "14 Square Amanora",..: 1635 1635 1635 1635 1635 1635 1635 1635 1409 1409 ...
##  $ RoomRent            : int  12375 10250 9900 10350 12000 11475 11220 9225 6800 9350 ...
##  $ StarRating          : num  5 5 5 5 5 5 5 5 4 4 ...
##  $ Airport             : num  21 21 21 21 21 21 21 21 20 20 ...
##  $ HotelAddress        : Factor w/ 2108 levels " H.P. High Court Mall Road, Shimla",..: 925 928 930 933 935 937 940 941 699 746 ...
##  $ HotelPincode        : int  400005 400006 400007 400008 400009 400010 400011 400012 400039 400040 ...
##  $ HotelDescription    : Factor w/ 1226 levels "#NAME?","10 star hotel near Queensroad, Amritsar",..: 1030 1030 1030 1030 1030 1030 1030 1030 1006 1006 ...
##  $ FreeWifi            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FreeBreakfast       : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ HotelCapacity       : int  287 287 287 287 287 287 287 287 28 28 ...
##  $ HasSwimmingPool     : int  1 1 1 1 1 1 1 1 0 0 ...
                          -------------pie representation-----------
#To represent the Hotel Locations by name
pie(table(Cities.df$CityName),
col=c("Violet","blue","green","yellow","red","purple","pink","grey","white","black","orange","skyblue","purple","goldenrod1","deeppink4","mediumorchid1",
"olivedrab",
"olivedrab1",
"olivedrab2",
"olivedrab3",
"olivedrab4",
"orange1"),main="List of hotels by CityName.")

#To represent the Hotels by its ranking. 
pie(table(Cities.df$CityRank),
col=c("Violet","blue","green","yellow","red","purple","pink","grey","white","black","orange","skyblue","purple"),main="Airline type by country")

#To get clear visualization by city name.
table(Cities.df$CityName)
## 
##             Agra        Ahmedabad         Amritsar        Bangalore 
##              432              424              136              656 
##      Bhubaneswar       Chandigarh          Chennai       Darjeeling 
##              120              336              416              136 
##            Delhi          Gangtok              Goa         Guwahati 
##             2048              128              624               48 
##         Haridwar        Hyderabad           Indore           Jaipur 
##               48              536              160              768 
##        Jaisalmer          Jodhpur           Kanpur            Kochi 
##              264              224               16              608 
##          Kolkata          Lucknow          Madurai           Manali 
##              512              128              112              288 
##        Mangalore           Mumbai           Munnar           Mysore 
##              104              712              328              160 
##         Nainital             Ooty        Panchkula             Pune 
##              144              136               64              600 
##             Puri           Rajkot        Rishikesh           Shimla 
##               56              128               88              280 
##         Srinagar            Surat Thiruvanthipuram         Thrissur 
##               40               80              392               32 
##          Udaipur         Varanasi 
##              456              264
#To get clear visualization by IsWeekend.
table(Cities.df$IsWeekend)              #0=False  1=True
## 
##    0    1 
## 4991 8241
##To get clear visualization by IsNewYearEve.
table(Cities.df$IsNewYearEve)           #0=False  1=True
## 
##     0     1 
## 11586  1646
#To get clear visualization by FreeWifi.
table(Cities.df$FreeWifi)               #0=False  1=True
## 
##     0     1 
##   981 12251
#To get clear visualization by FreeBreakfast.
table(Cities.df$FreeBreakfast)          #0=False  1=True
## 
##    0    1 
## 4643 8589
#To get clear visualization by Swimming pool.
table(Cities.df$HasSwimmingPool)        #0=False  1=True
## 
##    0    1 
## 8524 4708
mean(Cities.df$CityRank)
## [1] 14.83374
min(Cities.df$CityRank)
## [1] 0
mean(Cities.df$Population)
## [1] 4416837
min(Cities.df$Population)
## [1] 8096
max(Cities.df$Population)
## [1] 12442373
mean(Cities.df$RoomRent)
## [1] 5473.992
min(Cities.df$RoomRent)
## [1] 299
max(Cities.df$RoomRent)
## [1] 322500
mean(Cities.df$StarRating)
## [1] 3.458933
min(Cities.df$StarRating)
## [1] 0
max(Cities.df$StarRating)
## [1] 5
mean(Cities.df$Airport)
## [1] 21.15874
min(Cities.df$Airport)
## [1] 0.2
max(Cities.df$Airport)
## [1] 124
mean(Cities.df$HotelCapacity)
## [1] 62.51164
min(Cities.df$HotelCapacity)
## [1] 0
max(Cities.df$HotelCapacity)
## [1] 600

One way contigency tables

mytable <- with(Cities.df, table(StarRating))
mytable
## StarRating
##    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9    4  4.1 
##   16    8  440  632 5953    8   16    8 1752    8   24   16   32 2463   24 
##  4.3  4.4  4.5  4.7  4.8    5 
##   16    8  376    8   16 1408
prop.table(mytable)*100
## StarRating
##           0           1           2         2.5           3         3.2 
##  0.12091898  0.06045949  3.32527207  4.77629988 44.98941959  0.06045949 
##         3.3         3.4         3.5         3.6         3.7         3.8 
##  0.12091898  0.06045949 13.24062878  0.06045949  0.18137848  0.12091898 
##         3.9           4         4.1         4.3         4.4         4.5 
##  0.24183797 18.61396614  0.18137848  0.12091898  0.06045949  2.84159613 
##         4.7         4.8           5 
##  0.06045949  0.12091898 10.64087062
mytable1 <- with(Cities.df, table(IsMetroCity))
mytable1
## IsMetroCity
##    0    1 
## 9472 3760
prop.table(mytable1)*100
## IsMetroCity
##        0        1 
## 71.58404 28.41596
mytable2 <- with(Cities.df, table(FreeBreakfast))
mytable2
## FreeBreakfast
##    0    1 
## 4643 8589
prop.table(mytable2)*100
## FreeBreakfast
##        0        1 
## 35.08918 64.91082
mytable3 <- with(Cities.df, table(CityRank))
mytable3
## CityRank
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
##  712 2048  656  416  536  424  512   80  600  768   32  128   16  136  160 
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
##  432  448  624  128  264   40  224  336  392   48  160  120  272  104  456 
##   32   33   34   35   36   37   38   39   40   42   43   44 
##   48   56  280   64  136   88  128  136  264  144  328  288
prop.table(mytable3)*100
## CityRank
##          0          1          2          3          4          5 
##  5.3808948 15.4776300  4.9576784  3.1438936  4.0507860  3.2043531 
##          6          7          8          9         10         11 
##  3.8694075  0.6045949  4.5344619  5.8041112  0.2418380  0.9673519 
##         12         13         14         16         17         18 
##  0.1209190  1.0278114  1.2091898  3.2648126  3.3857316  4.7158404 
##         19         20         21         22         23         24 
##  0.9673519  1.9951632  0.3022975  1.6928658  2.5392987  2.9625151 
##         25         26         27         28         29         30 
##  0.3627570  1.2091898  0.9068924  2.0556227  0.7859734  3.4461911 
##         32         33         34         35         36         37 
##  0.3627570  0.4232164  2.1160822  0.4836759  1.0278114  0.6650544 
##         38         39         40         42         43         44 
##  0.9673519  1.0278114  1.9951632  1.0882709  2.4788392  2.1765417
mytable4 <- with(Cities.df, table(IsTouristDestination))
mytable4
## IsTouristDestination
##    0    1 
## 4007 9225
prop.table(mytable4)*100
## IsTouristDestination
##        0        1 
## 30.28265 69.71735

Two way contigency tables

mytable <- xtabs(~ FreeBreakfast+StarRating, data=Cities.df)
mytable
##              StarRating
## FreeBreakfast    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8
##             0   16    0  216  296 1789    0    8    0  661    8    0    8
##             1    0    8  224  336 4164    8    8    8 1091    0   24    8
##              StarRating
## FreeBreakfast  3.9    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##             0   16  783    0   16    0  224    8    0  594
##             1   16 1680   24    0    8  152    0   16  814
prop.table(mytable)*100
##              StarRating
## FreeBreakfast           0           1           2         2.5           3
##             0  0.12091898  0.00000000  1.63240629  2.23700121 13.52025393
##             1  0.00000000  0.06045949  1.69286578  2.53929867 31.46916566
##              StarRating
## FreeBreakfast         3.2         3.3         3.4         3.5         3.6
##             0  0.00000000  0.06045949  0.00000000  4.99546554  0.06045949
##             1  0.06045949  0.06045949  0.06045949  8.24516324  0.00000000
##              StarRating
## FreeBreakfast         3.7         3.8         3.9           4         4.1
##             0  0.00000000  0.06045949  0.12091898  5.91747279  0.00000000
##             1  0.18137848  0.06045949  0.12091898 12.69649335  0.18137848
##              StarRating
## FreeBreakfast         4.3         4.4         4.5         4.7         4.8
##             0  0.12091898  0.00000000  1.69286578  0.06045949  0.00000000
##             1  0.00000000  0.06045949  1.14873035  0.00000000  0.12091898
##              StarRating
## FreeBreakfast           5
##             0  4.48911729
##             1  6.15175333
margin.table(mytable,1)
## FreeBreakfast
##    0    1 
## 4643 8589
margin.table(mytable,2)
## StarRating
##    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9    4  4.1 
##   16    8  440  632 5953    8   16    8 1752    8   24   16   32 2463   24 
##  4.3  4.4  4.5  4.7  4.8    5 
##   16    8  376    8   16 1408
mytable1 <- xtabs(~ IsMetroCity+StarRating, data=Cities.df)
mytable1
##            StarRating
## IsMetroCity    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8
##           0   16    8  344  456 4336    8   16    8 1312    0   24   16
##           1    0    0   96  176 1617    0    0    0  440    8    0    0
##            StarRating
## IsMetroCity  3.9    4  4.1  4.3  4.4  4.5  4.7  4.8    5
##           0   32 1696   24   16    8  288    8   16  840
##           1    0  767    0    0    0   88    0    0  568
prop.table(mytable1)*100
##            StarRating
## IsMetroCity           0           1           2         2.5           3
##           0  0.12091898  0.06045949  2.59975816  3.44619105 32.76904474
##           1  0.00000000  0.00000000  0.72551391  1.33010883 12.22037485
##            StarRating
## IsMetroCity         3.2         3.3         3.4         3.5         3.6
##           0  0.06045949  0.12091898  0.06045949  9.91535671  0.00000000
##           1  0.00000000  0.00000000  0.00000000  3.32527207  0.06045949
##            StarRating
## IsMetroCity         3.7         3.8         3.9           4         4.1
##           0  0.18137848  0.12091898  0.24183797 12.81741233  0.18137848
##           1  0.00000000  0.00000000  0.00000000  5.79655381  0.00000000
##            StarRating
## IsMetroCity         4.3         4.4         4.5         4.7         4.8
##           0  0.12091898  0.06045949  2.17654172  0.06045949  0.12091898
##           1  0.00000000  0.00000000  0.66505441  0.00000000  0.00000000
##            StarRating
## IsMetroCity           5
##           0  6.34824667
##           1  4.29262394
margin.table(mytable1,1)
## IsMetroCity
##    0    1 
## 9472 3760
margin.table(mytable1,2)
## StarRating
##    0    1    2  2.5    3  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9    4  4.1 
##   16    8  440  632 5953    8   16    8 1752    8   24   16   32 2463   24 
##  4.3  4.4  4.5  4.7  4.8    5 
##   16    8  376    8   16 1408
mytable2 <- xtabs(~ IsMetroCity+IsTouristDestination, data=Cities.df)
mytable2
##            IsTouristDestination
## IsMetroCity    0    1
##           0 3352 6120
##           1  655 3105
prop.table(mytable2)*100
##            IsTouristDestination
## IsMetroCity         0         1
##           0 25.332527 46.251511
##           1  4.950121 23.465840
margin.table(mytable2,1)
## IsMetroCity
##    0    1 
## 9472 3760
margin.table(mytable2,2)
## IsTouristDestination
##    0    1 
## 4007 9225
mytable3 <- xtabs(~ FreeWifi+FreeBreakfast, data=Cities.df)
mytable3
##         FreeBreakfast
## FreeWifi    0    1
##        0  606  375
##        1 4037 8214
prop.table(mytable3)*100
##         FreeBreakfast
## FreeWifi         0         1
##        0  4.579807  2.834039
##        1 30.509371 62.076784
margin.table(mytable3,1)
## FreeWifi
##     0     1 
##   981 12251
margin.table(mytable3,2)
## FreeBreakfast
##    0    1 
## 4643 8589

Boxplots as per Analysis.

boxplot(Cities.df$CityRank  , horizontal =TRUE,main="Rank of the cities",col = "lightblue" )

boxplot(Cities.df$Population  , horizontal =TRUE, main="Population",col = "orange" )

boxplot(Cities.df$RoomRent ~ Cities.df$IsWeekend, horizontal=TRUE,ylab="IsWeekend(notWeekend=0,weekend=1)", xlab="RoomRents", las=1,main="Analysis of Roomrent and Isweekend avalability",col=c("black","white"))

boxplot(Cities.df$RoomRent ~ Cities.df$IsMetroCity, horizontal=TRUE,ylab="City(metro=1,other=0)", xlab="Room rent", las=1,main="Analysis of type of city and room rent of hotels",col=c("red","green"))

boxplot(Cities.df$StarRating ~ Cities.df$FreeBreakfast, horizontal=TRUE,ylab="breakfast avalability", xlab="Star ratings", las=1,main="Analysis of star rating and breakfast avalability",col=c("pink","yellow"))

Histograms as per Analysis.

hist(Cities.df$Population, main= "Population" ,xlab="Population" ,col = "skyblue")

hist(Cities.df$RoomRent,main="Analysis of room rents of hotels",xlab="Rents of room", ylab="Relative frequency", breaks=30, col="lightblue", freq=FALSE)

hist(Cities.df$StarRating,main="Analysis of star ratings of hotels",xlab="Star ratings", ylab="Relative frequency", breaks=30, col="yellow", freq=FALSE)

hist(Cities.df$Population, main= "Population" ,xlab="Population" ,col = "peachpuff")

hist(Cities.df$Airport, main = "Distance to nearest major airport", xlab = "Distance to nearest major Airport in km",col = "red")

hist(Cities.df$HotelCapacity, main = "Capacity of hotels", xlab = "Hotel Capacity", col = "black")

hist(Cities.df$CityRank, main = "Distribution of rank of cities", xlab = "City rank", col = "skyblue")

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(RoomRent~StarRating,data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of Star Rating vs Room rent",ylab="Room Rent", xlab="Star Rating")

library(car)    
scatterplot(RoomRent~CityRank,data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of City rank vs Room rent", ylab="Room Rent",xlab="Rank of City")

library(car)    
scatterplot(RoomRent~ HasSwimmingPool, data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of hotel with Swimming pool vs Room rent",ylab="Room Rent",xlab="Hotel with swimming pool")

library(car)
scatterplot(x = Cities.df$Population , y = Cities.df$CityRank, main="Population Vs City Rank " , xlab="Population", ylab="City rank")

library(corrgram)
corrgram(Cities.df, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt, main = "Corrgram of all  variables")

Correlation matrix

major <- Cities.df[, c(2,3,4,5,6,7,10,11,18)]
cor(major)
##                         Population      CityRank   IsMetroCity
## Population            1.0000000000 -0.8353204432  0.7712260105
## CityRank             -0.8353204432  1.0000000000 -0.5643937903
## IsMetroCity           0.7712260105 -0.5643937903  1.0000000000
## IsTouristDestination -0.0482029722  0.2807134520  0.1763717063
## IsWeekend             0.0115926802 -0.0072564766  0.0018118005
## IsNewYearEve          0.0007332482 -0.0006326444  0.0006464753
## RoomRent             -0.0887280632  0.0939855292 -0.0668397705
## StarRating            0.1341365933 -0.1333810133  0.0776028661
## HotelCapacity         0.2599830516 -0.2561197059  0.1871502153
##                      IsTouristDestination    IsWeekend  IsNewYearEve
## Population                   -0.048202972  0.011592680  0.0007332482
## CityRank                      0.280713452 -0.007256477 -0.0006326444
## IsMetroCity                   0.176371706  0.001811801  0.0006464753
## IsTouristDestination          1.000000000 -0.019481101 -0.0022663884
## IsWeekend                    -0.019481101  1.000000000  0.2923820508
## IsNewYearEve                 -0.002266388  0.292382051  1.0000000000
## RoomRent                      0.122502963  0.004580134  0.0384912269
## StarRating                   -0.040554998  0.006378436  0.0023608970
## HotelCapacity                -0.094356091  0.006306507  0.0013526790
##                          RoomRent   StarRating HotelCapacity
## Population           -0.088728063  0.134136593   0.259983052
## CityRank              0.093985529 -0.133381013  -0.256119706
## IsMetroCity          -0.066839771  0.077602866   0.187150215
## IsTouristDestination  0.122502963 -0.040554998  -0.094356091
## IsWeekend             0.004580134  0.006378436   0.006306507
## IsNewYearEve          0.038491227  0.002360897   0.001352679
## RoomRent              1.000000000  0.369373425   0.157873308
## StarRating            0.369373425  1.000000000   0.637430337
## HotelCapacity         0.157873308  0.637430337   1.000000000

Correlation tests.

cor.test(Cities.df$RoomRent, Cities.df$StarRating)
## 
##  Pearson's product-moment correlation
## 
## data:  Cities.df$RoomRent and Cities.df$StarRating
## t = 45.719, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3545660 0.3839956
## sample estimates:
##       cor 
## 0.3693734
cor.test(Cities.df$RoomRent, Cities.df$IsMetroCity)
## 
##  Pearson's product-moment correlation
## 
## data:  Cities.df$RoomRent and Cities.df$IsMetroCity
## t = -7.7053, df = 13230, p-value = 1.399e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08378329 -0.04985761
## sample estimates:
##         cor 
## -0.06683977
cor.test(Cities.df$RoomRent, Cities.df$CityRank)
## 
##  Pearson's product-moment correlation
## 
## data:  Cities.df$RoomRent and Cities.df$CityRank
## t = 10.858, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.07707001 0.11084696
## sample estimates:
##        cor 
## 0.09398553
cor.test(Cities.df$RoomRent, Cities.df$IsNewYearEve)
## 
##  Pearson's product-moment correlation
## 
## data:  Cities.df$RoomRent and Cities.df$IsNewYearEve
## t = 4.4306, df = 13230, p-value = 9.472e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.02146637 0.05549377
## sample estimates:
##        cor 
## 0.03849123

T-Tests

       #--------Null Hypothesis - Their is no Difference between the Room Rent on weekdays and on weekends-------#
t.test(Cities.df$RoomRent ~ Cities.df$IsWeekend)
## 
##  Welch Two Sample t-test
## 
## data:  Cities.df$RoomRent by Cities.df$IsWeekend
## t = -0.51853, df = 9999.4, p-value = 0.6041
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -331.2427  192.6559
## sample estimates:
## mean in group 0 mean in group 1 
##        5430.835        5500.129

As we can see the P-Value = 0.6 (>0.05) , We Fail To reject the Null Hypothesis.

It Means Their is No Significant Difference Between the Room rents on Weekdays and Weekends.

#Null Hypothesis - Their is no Difference between the Room Rent on new year's eve and on other days
t.test(Cities.df$RoomRent ~ Cities.df$IsNewYearEve)
## 
##  Welch Two Sample t-test
## 
## data:  Cities.df$RoomRent by Cities.df$IsNewYearEve
## t = -4.1793, df = 2065, p-value = 3.046e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1256.5297  -453.9099
## sample estimates:
## mean in group 0 mean in group 1 
##        5367.606        6222.826

P-Value = 3.046e-05 (<0.05) Which is small enough for Rejecting the Null Hupothesis.

Hence there is significant difference between the Room Rent on new year’s eve and on other days

#Null Hypothesis - Their is no Difference between the Room Rent of Metro Cities and other cities
t.test(Cities.df$RoomRent ~ Cities.df$IsMetroCity)
## 
##  Welch Two Sample t-test
## 
## data:  Cities.df$RoomRent by Cities.df$IsMetroCity
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   888.0308 1285.4102
## sample estimates:
## mean in group 0 mean in group 1 
##        5782.794        4696.073

P-Value = 2.2e-16 (<0.05) Which is small enough for Rejecting the Null Hupothesis.

Hence there is significant difference between the Room Rent of Metro Cities and other cities

#Null Hypothesis - Their is no Difference between the Room Rent where wifi is free and other rooms.
t.test(Cities.df$RoomRent ~ Cities.df$FreeWifi)
## 
##  Welch Two Sample t-test
## 
## data:  Cities.df$RoomRent by Cities.df$FreeWifi
## t = -0.76847, df = 1804.7, p-value = 0.4423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -360.5977  157.5701
## sample estimates:
## mean in group 0 mean in group 1 
##        5380.004        5481.518

P-Value = 0.44 (>0.05) , We Fail To reject the Null Hypothesis.

It Shows that Their is No Significant Difference Between the Room Rent where wifi is free and other rooms.

#Null Hypothesis: Their is no difference in the means of room Rent where free Breakfast is available or not
t.test(Cities.df$RoomRent ~ Cities.df$FreeBreakfast)
## 
##  Welch Two Sample t-test
## 
## data:  Cities.df$RoomRent by Cities.df$FreeBreakfast
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -153.5017  460.9935
## sample estimates:
## mean in group 0 mean in group 1 
##        5573.790        5420.044

The difference between The two means is not different as p-value = 0.32 (>0.05) so we fail to reject the Null hypothesis.

It Means The Room rents Are same for all room whether free Breakfast is available or not.