------------pricing strategy of hotels in the Indian hotel industry----------
#Read Data using read.csv and view Hotel room pricing.
Cities.df <- read.csv(paste("Cities42.csv", sep=""))
View(Cities.df)
#Summarizing the data
library(psych)
describe(Cities.df)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.19 488.16 827 841.18
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.53 582.17 1261 1233.25
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.34 363.26 567 575.37
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 641.97 1.0 1670 1669.0 0.01
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 472.95 1.0 1226 1225.0 0.11
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.25 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
#Checking the dimesnsions of the dataset
dim(Cities.df)
## [1] 13232 19
#Summarizing the data
library(psych)
describe(Cities.df)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.19 488.16 827 841.18
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.53 582.17 1261 1233.25
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.34 363.26 567 575.37
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 641.97 1.0 1670 1669.0 0.01
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 472.95 1.0 1226 1225.0 0.11
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.25 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
#To find the datatypes
str(Cities.df)
## 'data.frame': 13232 obs. of 19 variables:
## $ CityName : Factor w/ 42 levels "Agra","Ahmedabad",..: 26 26 26 26 26 26 26 26 26 26 ...
## $ Population : int 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 12442373 ...
## $ CityRank : int 0 0 0 0 0 0 0 0 0 0 ...
## $ IsMetroCity : int 1 1 1 1 1 1 1 1 1 1 ...
## $ IsTouristDestination: int 1 1 1 1 1 1 1 1 1 1 ...
## $ IsWeekend : int 1 0 1 1 0 1 0 1 1 0 ...
## $ IsNewYearEve : int 0 0 0 0 0 1 0 0 0 0 ...
## $ Date : Factor w/ 20 levels "04-Jan-16","04-Jan-17",..: 11 12 13 14 15 16 17 18 11 12 ...
## $ HotelName : Factor w/ 1670 levels "14 Square Amanora",..: 1635 1635 1635 1635 1635 1635 1635 1635 1409 1409 ...
## $ RoomRent : int 12375 10250 9900 10350 12000 11475 11220 9225 6800 9350 ...
## $ StarRating : num 5 5 5 5 5 5 5 5 4 4 ...
## $ Airport : num 21 21 21 21 21 21 21 21 20 20 ...
## $ HotelAddress : Factor w/ 2108 levels " H.P. High Court Mall Road, Shimla",..: 925 928 930 933 935 937 940 941 699 746 ...
## $ HotelPincode : int 400005 400006 400007 400008 400009 400010 400011 400012 400039 400040 ...
## $ HotelDescription : Factor w/ 1226 levels "#NAME?","10 star hotel near Queensroad, Amritsar",..: 1030 1030 1030 1030 1030 1030 1030 1030 1006 1006 ...
## $ FreeWifi : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FreeBreakfast : int 0 0 0 0 0 0 0 0 1 1 ...
## $ HotelCapacity : int 287 287 287 287 287 287 287 287 28 28 ...
## $ HasSwimmingPool : int 1 1 1 1 1 1 1 1 0 0 ...
-------------pie representation-----------
#To represent the Hotel Locations by name
pie(table(Cities.df$CityName),
col=c("Violet","blue","green","yellow","red","purple","pink","grey","white","black","orange","skyblue","purple","goldenrod1","deeppink4","mediumorchid1",
"olivedrab",
"olivedrab1",
"olivedrab2",
"olivedrab3",
"olivedrab4",
"orange1"),main="List of hotels by CityName.")
#To represent the Hotels by its ranking.
pie(table(Cities.df$CityRank),
col=c("Violet","blue","green","yellow","red","purple","pink","grey","white","black","orange","skyblue","purple"),main="Airline type by country")
#To get clear visualization by city name.
table(Cities.df$CityName)
##
## Agra Ahmedabad Amritsar Bangalore
## 432 424 136 656
## Bhubaneswar Chandigarh Chennai Darjeeling
## 120 336 416 136
## Delhi Gangtok Goa Guwahati
## 2048 128 624 48
## Haridwar Hyderabad Indore Jaipur
## 48 536 160 768
## Jaisalmer Jodhpur Kanpur Kochi
## 264 224 16 608
## Kolkata Lucknow Madurai Manali
## 512 128 112 288
## Mangalore Mumbai Munnar Mysore
## 104 712 328 160
## Nainital Ooty Panchkula Pune
## 144 136 64 600
## Puri Rajkot Rishikesh Shimla
## 56 128 88 280
## Srinagar Surat Thiruvanthipuram Thrissur
## 40 80 392 32
## Udaipur Varanasi
## 456 264
#To get clear visualization by IsWeekend.
table(Cities.df$IsWeekend) #0=False 1=True
##
## 0 1
## 4991 8241
##To get clear visualization by IsNewYearEve.
table(Cities.df$IsNewYearEve) #0=False 1=True
##
## 0 1
## 11586 1646
#To get clear visualization by FreeWifi.
table(Cities.df$FreeWifi) #0=False 1=True
##
## 0 1
## 981 12251
#To get clear visualization by FreeBreakfast.
table(Cities.df$FreeBreakfast) #0=False 1=True
##
## 0 1
## 4643 8589
#To get clear visualization by Swimming pool.
table(Cities.df$HasSwimmingPool) #0=False 1=True
##
## 0 1
## 8524 4708
mean(Cities.df$CityRank)
## [1] 14.83374
min(Cities.df$CityRank)
## [1] 0
mean(Cities.df$Population)
## [1] 4416837
min(Cities.df$Population)
## [1] 8096
max(Cities.df$Population)
## [1] 12442373
mean(Cities.df$RoomRent)
## [1] 5473.992
min(Cities.df$RoomRent)
## [1] 299
max(Cities.df$RoomRent)
## [1] 322500
mean(Cities.df$StarRating)
## [1] 3.458933
min(Cities.df$StarRating)
## [1] 0
max(Cities.df$StarRating)
## [1] 5
mean(Cities.df$Airport)
## [1] 21.15874
min(Cities.df$Airport)
## [1] 0.2
max(Cities.df$Airport)
## [1] 124
mean(Cities.df$HotelCapacity)
## [1] 62.51164
min(Cities.df$HotelCapacity)
## [1] 0
max(Cities.df$HotelCapacity)
## [1] 600
mytable <- with(Cities.df, table(StarRating))
mytable
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
prop.table(mytable)*100
## StarRating
## 0 1 2 2.5 3 3.2
## 0.12091898 0.06045949 3.32527207 4.77629988 44.98941959 0.06045949
## 3.3 3.4 3.5 3.6 3.7 3.8
## 0.12091898 0.06045949 13.24062878 0.06045949 0.18137848 0.12091898
## 3.9 4 4.1 4.3 4.4 4.5
## 0.24183797 18.61396614 0.18137848 0.12091898 0.06045949 2.84159613
## 4.7 4.8 5
## 0.06045949 0.12091898 10.64087062
mytable1 <- with(Cities.df, table(IsMetroCity))
mytable1
## IsMetroCity
## 0 1
## 9472 3760
prop.table(mytable1)*100
## IsMetroCity
## 0 1
## 71.58404 28.41596
mytable2 <- with(Cities.df, table(FreeBreakfast))
mytable2
## FreeBreakfast
## 0 1
## 4643 8589
prop.table(mytable2)*100
## FreeBreakfast
## 0 1
## 35.08918 64.91082
mytable3 <- with(Cities.df, table(CityRank))
mytable3
## CityRank
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 712 2048 656 416 536 424 512 80 600 768 32 128 16 136 160
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 432 448 624 128 264 40 224 336 392 48 160 120 272 104 456
## 32 33 34 35 36 37 38 39 40 42 43 44
## 48 56 280 64 136 88 128 136 264 144 328 288
prop.table(mytable3)*100
## CityRank
## 0 1 2 3 4 5
## 5.3808948 15.4776300 4.9576784 3.1438936 4.0507860 3.2043531
## 6 7 8 9 10 11
## 3.8694075 0.6045949 4.5344619 5.8041112 0.2418380 0.9673519
## 12 13 14 16 17 18
## 0.1209190 1.0278114 1.2091898 3.2648126 3.3857316 4.7158404
## 19 20 21 22 23 24
## 0.9673519 1.9951632 0.3022975 1.6928658 2.5392987 2.9625151
## 25 26 27 28 29 30
## 0.3627570 1.2091898 0.9068924 2.0556227 0.7859734 3.4461911
## 32 33 34 35 36 37
## 0.3627570 0.4232164 2.1160822 0.4836759 1.0278114 0.6650544
## 38 39 40 42 43 44
## 0.9673519 1.0278114 1.9951632 1.0882709 2.4788392 2.1765417
mytable4 <- with(Cities.df, table(IsTouristDestination))
mytable4
## IsTouristDestination
## 0 1
## 4007 9225
prop.table(mytable4)*100
## IsTouristDestination
## 0 1
## 30.28265 69.71735
mytable <- xtabs(~ FreeBreakfast+StarRating, data=Cities.df)
mytable
## StarRating
## FreeBreakfast 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0 16 0 216 296 1789 0 8 0 661 8 0 8
## 1 0 8 224 336 4164 8 8 8 1091 0 24 8
## StarRating
## FreeBreakfast 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 16 783 0 16 0 224 8 0 594
## 1 16 1680 24 0 8 152 0 16 814
prop.table(mytable)*100
## StarRating
## FreeBreakfast 0 1 2 2.5 3
## 0 0.12091898 0.00000000 1.63240629 2.23700121 13.52025393
## 1 0.00000000 0.06045949 1.69286578 2.53929867 31.46916566
## StarRating
## FreeBreakfast 3.2 3.3 3.4 3.5 3.6
## 0 0.00000000 0.06045949 0.00000000 4.99546554 0.06045949
## 1 0.06045949 0.06045949 0.06045949 8.24516324 0.00000000
## StarRating
## FreeBreakfast 3.7 3.8 3.9 4 4.1
## 0 0.00000000 0.06045949 0.12091898 5.91747279 0.00000000
## 1 0.18137848 0.06045949 0.12091898 12.69649335 0.18137848
## StarRating
## FreeBreakfast 4.3 4.4 4.5 4.7 4.8
## 0 0.12091898 0.00000000 1.69286578 0.06045949 0.00000000
## 1 0.00000000 0.06045949 1.14873035 0.00000000 0.12091898
## StarRating
## FreeBreakfast 5
## 0 4.48911729
## 1 6.15175333
margin.table(mytable,1)
## FreeBreakfast
## 0 1
## 4643 8589
margin.table(mytable,2)
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
mytable1 <- xtabs(~ IsMetroCity+StarRating, data=Cities.df)
mytable1
## StarRating
## IsMetroCity 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0 16 8 344 456 4336 8 16 8 1312 0 24 16
## 1 0 0 96 176 1617 0 0 0 440 8 0 0
## StarRating
## IsMetroCity 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 32 1696 24 16 8 288 8 16 840
## 1 0 767 0 0 0 88 0 0 568
prop.table(mytable1)*100
## StarRating
## IsMetroCity 0 1 2 2.5 3
## 0 0.12091898 0.06045949 2.59975816 3.44619105 32.76904474
## 1 0.00000000 0.00000000 0.72551391 1.33010883 12.22037485
## StarRating
## IsMetroCity 3.2 3.3 3.4 3.5 3.6
## 0 0.06045949 0.12091898 0.06045949 9.91535671 0.00000000
## 1 0.00000000 0.00000000 0.00000000 3.32527207 0.06045949
## StarRating
## IsMetroCity 3.7 3.8 3.9 4 4.1
## 0 0.18137848 0.12091898 0.24183797 12.81741233 0.18137848
## 1 0.00000000 0.00000000 0.00000000 5.79655381 0.00000000
## StarRating
## IsMetroCity 4.3 4.4 4.5 4.7 4.8
## 0 0.12091898 0.06045949 2.17654172 0.06045949 0.12091898
## 1 0.00000000 0.00000000 0.66505441 0.00000000 0.00000000
## StarRating
## IsMetroCity 5
## 0 6.34824667
## 1 4.29262394
margin.table(mytable1,1)
## IsMetroCity
## 0 1
## 9472 3760
margin.table(mytable1,2)
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
mytable2 <- xtabs(~ IsMetroCity+IsTouristDestination, data=Cities.df)
mytable2
## IsTouristDestination
## IsMetroCity 0 1
## 0 3352 6120
## 1 655 3105
prop.table(mytable2)*100
## IsTouristDestination
## IsMetroCity 0 1
## 0 25.332527 46.251511
## 1 4.950121 23.465840
margin.table(mytable2,1)
## IsMetroCity
## 0 1
## 9472 3760
margin.table(mytable2,2)
## IsTouristDestination
## 0 1
## 4007 9225
mytable3 <- xtabs(~ FreeWifi+FreeBreakfast, data=Cities.df)
mytable3
## FreeBreakfast
## FreeWifi 0 1
## 0 606 375
## 1 4037 8214
prop.table(mytable3)*100
## FreeBreakfast
## FreeWifi 0 1
## 0 4.579807 2.834039
## 1 30.509371 62.076784
margin.table(mytable3,1)
## FreeWifi
## 0 1
## 981 12251
margin.table(mytable3,2)
## FreeBreakfast
## 0 1
## 4643 8589
boxplot(Cities.df$CityRank , horizontal =TRUE,main="Rank of the cities",col = "lightblue" )
boxplot(Cities.df$Population , horizontal =TRUE, main="Population",col = "orange" )
boxplot(Cities.df$RoomRent ~ Cities.df$IsWeekend, horizontal=TRUE,ylab="IsWeekend(notWeekend=0,weekend=1)", xlab="RoomRents", las=1,main="Analysis of Roomrent and Isweekend avalability",col=c("black","white"))
boxplot(Cities.df$RoomRent ~ Cities.df$IsMetroCity, horizontal=TRUE,ylab="City(metro=1,other=0)", xlab="Room rent", las=1,main="Analysis of type of city and room rent of hotels",col=c("red","green"))
boxplot(Cities.df$StarRating ~ Cities.df$FreeBreakfast, horizontal=TRUE,ylab="breakfast avalability", xlab="Star ratings", las=1,main="Analysis of star rating and breakfast avalability",col=c("pink","yellow"))
hist(Cities.df$Population, main= "Population" ,xlab="Population" ,col = "skyblue")
hist(Cities.df$RoomRent,main="Analysis of room rents of hotels",xlab="Rents of room", ylab="Relative frequency", breaks=30, col="lightblue", freq=FALSE)
hist(Cities.df$StarRating,main="Analysis of star ratings of hotels",xlab="Star ratings", ylab="Relative frequency", breaks=30, col="yellow", freq=FALSE)
hist(Cities.df$Population, main= "Population" ,xlab="Population" ,col = "peachpuff")
hist(Cities.df$Airport, main = "Distance to nearest major airport", xlab = "Distance to nearest major Airport in km",col = "red")
hist(Cities.df$HotelCapacity, main = "Capacity of hotels", xlab = "Hotel Capacity", col = "black")
hist(Cities.df$CityRank, main = "Distribution of rank of cities", xlab = "City rank", col = "skyblue")
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(RoomRent~StarRating,data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of Star Rating vs Room rent",ylab="Room Rent", xlab="Star Rating")
library(car)
scatterplot(RoomRent~CityRank,data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of City rank vs Room rent", ylab="Room Rent",xlab="Rank of City")
library(car)
scatterplot(RoomRent~ HasSwimmingPool, data=Cities.df,spread=FALSE, smoother.args=list(lty=2),main="Scatter plot of hotel with Swimming pool vs Room rent",ylab="Room Rent",xlab="Hotel with swimming pool")
library(car)
scatterplot(x = Cities.df$Population , y = Cities.df$CityRank, main="Population Vs City Rank " , xlab="Population", ylab="City rank")
library(corrgram)
corrgram(Cities.df, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt, main = "Corrgram of all variables")
major <- Cities.df[, c(2,3,4,5,6,7,10,11,18)]
cor(major)
## Population CityRank IsMetroCity
## Population 1.0000000000 -0.8353204432 0.7712260105
## CityRank -0.8353204432 1.0000000000 -0.5643937903
## IsMetroCity 0.7712260105 -0.5643937903 1.0000000000
## IsTouristDestination -0.0482029722 0.2807134520 0.1763717063
## IsWeekend 0.0115926802 -0.0072564766 0.0018118005
## IsNewYearEve 0.0007332482 -0.0006326444 0.0006464753
## RoomRent -0.0887280632 0.0939855292 -0.0668397705
## StarRating 0.1341365933 -0.1333810133 0.0776028661
## HotelCapacity 0.2599830516 -0.2561197059 0.1871502153
## IsTouristDestination IsWeekend IsNewYearEve
## Population -0.048202972 0.011592680 0.0007332482
## CityRank 0.280713452 -0.007256477 -0.0006326444
## IsMetroCity 0.176371706 0.001811801 0.0006464753
## IsTouristDestination 1.000000000 -0.019481101 -0.0022663884
## IsWeekend -0.019481101 1.000000000 0.2923820508
## IsNewYearEve -0.002266388 0.292382051 1.0000000000
## RoomRent 0.122502963 0.004580134 0.0384912269
## StarRating -0.040554998 0.006378436 0.0023608970
## HotelCapacity -0.094356091 0.006306507 0.0013526790
## RoomRent StarRating HotelCapacity
## Population -0.088728063 0.134136593 0.259983052
## CityRank 0.093985529 -0.133381013 -0.256119706
## IsMetroCity -0.066839771 0.077602866 0.187150215
## IsTouristDestination 0.122502963 -0.040554998 -0.094356091
## IsWeekend 0.004580134 0.006378436 0.006306507
## IsNewYearEve 0.038491227 0.002360897 0.001352679
## RoomRent 1.000000000 0.369373425 0.157873308
## StarRating 0.369373425 1.000000000 0.637430337
## HotelCapacity 0.157873308 0.637430337 1.000000000
cor.test(Cities.df$RoomRent, Cities.df$StarRating)
##
## Pearson's product-moment correlation
##
## data: Cities.df$RoomRent and Cities.df$StarRating
## t = 45.719, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3545660 0.3839956
## sample estimates:
## cor
## 0.3693734
cor.test(Cities.df$RoomRent, Cities.df$IsMetroCity)
##
## Pearson's product-moment correlation
##
## data: Cities.df$RoomRent and Cities.df$IsMetroCity
## t = -7.7053, df = 13230, p-value = 1.399e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08378329 -0.04985761
## sample estimates:
## cor
## -0.06683977
cor.test(Cities.df$RoomRent, Cities.df$CityRank)
##
## Pearson's product-moment correlation
##
## data: Cities.df$RoomRent and Cities.df$CityRank
## t = 10.858, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.07707001 0.11084696
## sample estimates:
## cor
## 0.09398553
cor.test(Cities.df$RoomRent, Cities.df$IsNewYearEve)
##
## Pearson's product-moment correlation
##
## data: Cities.df$RoomRent and Cities.df$IsNewYearEve
## t = 4.4306, df = 13230, p-value = 9.472e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02146637 0.05549377
## sample estimates:
## cor
## 0.03849123
#--------Null Hypothesis - Their is no Difference between the Room Rent on weekdays and on weekends-------#
t.test(Cities.df$RoomRent ~ Cities.df$IsWeekend)
##
## Welch Two Sample t-test
##
## data: Cities.df$RoomRent by Cities.df$IsWeekend
## t = -0.51853, df = 9999.4, p-value = 0.6041
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -331.2427 192.6559
## sample estimates:
## mean in group 0 mean in group 1
## 5430.835 5500.129
It Means Their is No Significant Difference Between the Room rents on Weekdays and Weekends.
#Null Hypothesis - Their is no Difference between the Room Rent on new year's eve and on other days
t.test(Cities.df$RoomRent ~ Cities.df$IsNewYearEve)
##
## Welch Two Sample t-test
##
## data: Cities.df$RoomRent by Cities.df$IsNewYearEve
## t = -4.1793, df = 2065, p-value = 3.046e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1256.5297 -453.9099
## sample estimates:
## mean in group 0 mean in group 1
## 5367.606 6222.826
Hence there is significant difference between the Room Rent on new year’s eve and on other days
#Null Hypothesis - Their is no Difference between the Room Rent of Metro Cities and other cities
t.test(Cities.df$RoomRent ~ Cities.df$IsMetroCity)
##
## Welch Two Sample t-test
##
## data: Cities.df$RoomRent by Cities.df$IsMetroCity
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 888.0308 1285.4102
## sample estimates:
## mean in group 0 mean in group 1
## 5782.794 4696.073
Hence there is significant difference between the Room Rent of Metro Cities and other cities
#Null Hypothesis - Their is no Difference between the Room Rent where wifi is free and other rooms.
t.test(Cities.df$RoomRent ~ Cities.df$FreeWifi)
##
## Welch Two Sample t-test
##
## data: Cities.df$RoomRent by Cities.df$FreeWifi
## t = -0.76847, df = 1804.7, p-value = 0.4423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -360.5977 157.5701
## sample estimates:
## mean in group 0 mean in group 1
## 5380.004 5481.518
It Shows that Their is No Significant Difference Between the Room Rent where wifi is free and other rooms.
#Null Hypothesis: Their is no difference in the means of room Rent where free Breakfast is available or not
t.test(Cities.df$RoomRent ~ Cities.df$FreeBreakfast)
##
## Welch Two Sample t-test
##
## data: Cities.df$RoomRent by Cities.df$FreeBreakfast
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -153.5017 460.9935
## sample estimates:
## mean in group 0 mean in group 1
## 5573.790 5420.044
It Means The Room rents Are same for all room whether free Breakfast is available or not.