#TITE: MANAGEMENT OF HOTELS IN INDIA"
#NAME: Ayush Bose
#E-mail:boseayush384@gmail.com
#DATE: January 31,2018
#College: JIIT, Noida
#INTRODUCTION
#This project identifies the factors that matter the most.
#This dataset consists of data from different hotels located in different cities.
#This project is about the hotels in different cities of India.
#It shows the factors which impacts the pricing system of hotels. Like whether the city
#is a tourist place or whether it is a weekend that affets the pricing of a hotel room.
mydata.df <- read.csv(paste("Cities42.csv", sep=""))
View(mydata.df)
summary(mydata.df)
## CityName Population CityRank IsMetroCity
## Delhi :2048 Min. : 8096 Min. : 0.00 Min. :0.0000
## Jaipur : 768 1st Qu.: 744983 1st Qu.: 2.00 1st Qu.:0.0000
## Mumbai : 712 Median : 3046163 Median : 9.00 Median :0.0000
## Bangalore: 656 Mean : 4416837 Mean :14.83 Mean :0.2842
## Goa : 624 3rd Qu.: 8443675 3rd Qu.:24.00 3rd Qu.:1.0000
## Kochi : 608 Max. :12442373 Max. :44.00 Max. :1.0000
## (Other) :7816
## IsTouristDestination IsWeekend IsNewYearEve Date
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Dec 21 2016:1611
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Dec 24 2016:1611
## Median :1.0000 Median :1.0000 Median :0.0000 Dec 25 2016:1611
## Mean :0.6972 Mean :0.6228 Mean :0.1244 Dec 28 2016:1611
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 Dec 31 2016:1611
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Dec 18 2016:1608
## (Other) :3569
## HotelName RoomRent StarRating
## Vivanta by Taj : 32 Min. : 299 Min. :0.000
## Goldfinch Hotel : 24 1st Qu.: 2436 1st Qu.:3.000
## OYO Rooms : 24 Median : 4000 Median :3.000
## The Gordon House Hotel: 24 Mean : 5474 Mean :3.459
## Apnayt Villa : 16 3rd Qu.: 6299 3rd Qu.:4.000
## Bentleys Hotel Colaba : 16 Max. :322500 Max. :5.000
## (Other) :13096
## Airport
## Min. : 0.20
## 1st Qu.: 8.40
## Median : 15.00
## Mean : 21.16
## 3rd Qu.: 24.00
## Max. :124.00
##
## HotelAddress
## The Mall, Shimla : 32
## #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India: 16
## 121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra : 16
## 14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
## 144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu : 16
## 17, Oliver Road, Colaba, Mumbai, Maharashtra : 16
## (Other) :13120
## HotelPincode HotelDescription FreeWifi FreeBreakfast
## Min. : 100025 3 : 120 Min. :0.0000 Min. :0.0000
## 1st Qu.: 221001 Abc : 112 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 395003 3-star hotel: 104 Median :1.0000 Median :1.0000
## Mean : 397430 3.5 : 88 Mean :0.9259 Mean :0.6491
## 3rd Qu.: 570001 4 : 72 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :7000157 (Other) :12728 Max. :1.0000 Max. :1.0000
## NA's : 8
## HotelCapacity HasSwimmingPool
## Min. : 0.00 Min. :0.0000
## 1st Qu.: 16.00 1st Qu.:0.0000
## Median : 34.00 Median :0.0000
## Mean : 62.51 Mean :0.3558
## 3rd Qu.: 75.00 3rd Qu.:1.0000
## Max. :600.00 Max. :1.0000
##
attach(mydata.df)
head(mydata.df)
## CityName Population CityRank IsMetroCity IsTouristDestination IsWeekend
## 1 Mumbai 12442373 0 1 1 1
## 2 Mumbai 12442373 0 1 1 0
## 3 Mumbai 12442373 0 1 1 1
## 4 Mumbai 12442373 0 1 1 1
## 5 Mumbai 12442373 0 1 1 0
## 6 Mumbai 12442373 0 1 1 1
## IsNewYearEve Date HotelName RoomRent StarRating Airport
## 1 0 Dec 18 2016 Vivanta by Taj 12375 5 21
## 2 0 Dec 21 2016 Vivanta by Taj 10250 5 21
## 3 0 Dec 24 2016 Vivanta by Taj 9900 5 21
## 4 0 Dec 25 2016 Vivanta by Taj 10350 5 21
## 5 0 Dec 28 2016 Vivanta by Taj 12000 5 21
## 6 1 Dec 31 2016 Vivanta by Taj 11475 5 21
## HotelAddress HotelPincode
## 1 90 Cuffe Parade, Colaba, Mumbai, Maharashtra 400005
## 2 91 Cuffe Parade, Colaba, Mumbai, Maharashtra 400006
## 3 92 Cuffe Parade, Colaba, Mumbai, Maharashtra 400007
## 4 93 Cuffe Parade, Colaba, Mumbai, Maharashtra 400008
## 5 94 Cuffe Parade, Colaba, Mumbai, Maharashtra 400009
## 6 95 Cuffe Parade, Colaba, Mumbai, Maharashtra 400010
## HotelDescription FreeWifi FreeBreakfast
## 1 Luxury hotel with spa, near Gateway of India 1 0
## 2 Luxury hotel with spa, near Gateway of India 1 0
## 3 Luxury hotel with spa, near Gateway of India 1 0
## 4 Luxury hotel with spa, near Gateway of India 1 0
## 5 Luxury hotel with spa, near Gateway of India 1 0
## 6 Luxury hotel with spa, near Gateway of India 1 0
## HotelCapacity HasSwimmingPool
## 1 287 1
## 2 287 1
## 3 287 1
## 4 287 1
## 5 287 1
## 6 287 1
dim(mydata.df)
## [1] 13232 19
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(mydata.df)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.19 488.16 827 841.18
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.53 582.17 1261 1233.25
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.34 363.26 567 575.37
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 641.97 1.0 1670 1669.0 0.01
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 472.95 1.0 1226 1225.0 0.11
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.25 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
#One way contingency table
mytable <- with(mydata.df, table(StarRating))
mytable
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
mytable2 <- with(mydata.df, table(FreeBreakfast))
mytable2
## FreeBreakfast
## 0 1
## 4643 8589
mytable3 <- with(mydata.df, table(CityRank))
mytable3
## CityRank
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 712 2048 656 416 536 424 512 80 600 768 32 128 16 136 160
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 432 448 624 128 264 40 224 336 392 48 160 120 272 104 456
## 32 33 34 35 36 37 38 39 40 42 43 44
## 48 56 280 64 136 88 128 136 264 144 328 288
#Two way contingency table
mytable <- xtabs(~ FreeBreakfast+StarRating, data=mydata.df)
mytable
## StarRating
## FreeBreakfast 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0 16 0 216 296 1789 0 8 0 661 8 0 8
## 1 0 8 224 336 4164 8 8 8 1091 0 24 8
## StarRating
## FreeBreakfast 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 16 783 0 16 0 224 8 0 594
## 1 16 1680 24 0 8 152 0 16 814
mytable1 <- xtabs(~ IsMetroCity+StarRating, data=mydata.df)
mytable1
## StarRating
## IsMetroCity 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0 16 8 344 456 4336 8 16 8 1312 0 24 16
## 1 0 0 96 176 1617 0 0 0 440 8 0 0
## StarRating
## IsMetroCity 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 32 1696 24 16 8 288 8 16 840
## 1 0 767 0 0 0 88 0 0 568
mytable2 <- xtabs(~ IsMetroCity+IsTouristDestination, data=mydata.df)
mytable2
## IsTouristDestination
## IsMetroCity 0 1
## 0 3352 6120
## 1 655 3105
#HISTOGRAMS
hist(mydata.df$RoomRent,
main="Analysis of room rents of hotels",
xlab="Rents of room", ylab="Relative frequency",
breaks=30, col="lightblue", freq=FALSE)

hist(mydata.df$StarRating,
main="Analysis of star ratings of hotels",
xlab="Star ratings", ylab="Relative frequency",
breaks=30, col="red", freq=FALSE)

hist(mydata.df$HotelCapacity, main= "Hotel Capacity" ,xlab="Capacity" ,col = "green")

hist(mydata.df$Population, main= "Population" ,xlab="Population" ,col = "yellow")

#BOXPLOTS
boxplot(mydata.df$StarRating , horizontal =TRUE,main="Star Rating",
col = "lightblue" )

boxplot(mydata.df$CityRank ~ mydata.df$FreeBreakfast , horizontal =TRUE,
main="Availability of Wifi and Breakfast", xlab="Rank of city",
ylab="Breakfast availability",las =1 ,col=c("red","pink") )

boxplot(mydata.df$RoomRent ~ mydata.df$IsMetroCity, horizontal=TRUE,
ylab="City", xlab="Room rent", las=1,
main="Analysis of metro city and room rent of hotels",
col=c("brown","green"))

#SCATTER PLOT
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(RoomRent~StarRating, data=mydata.df,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of Star Rating vs Room rent",
ylab="Room Rent",
xlab="Star Rating")

scatterplotMatrix(formula = ~ RoomRent + IsWeekend + IsNewYearEve +Airport ,
data = mydata.df, pch = 16)
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth

scatterplot(x = mydata.df$RoomRent , y = mydata.df$StarRating,
main="Room Rent Vs Star Rating " , xlab="Room Rent", ylab="Star Rating")

#Corrgrams
cor(mydata.df[, c(2,3,4,5,6,7,10,11,18)])
## Population CityRank IsMetroCity
## Population 1.0000000000 -0.8353204432 0.7712260105
## CityRank -0.8353204432 1.0000000000 -0.5643937903
## IsMetroCity 0.7712260105 -0.5643937903 1.0000000000
## IsTouristDestination -0.0482029722 0.2807134520 0.1763717063
## IsWeekend 0.0115926802 -0.0072564766 0.0018118005
## IsNewYearEve 0.0007332482 -0.0006326444 0.0006464753
## RoomRent -0.0887280632 0.0939855292 -0.0668397705
## StarRating 0.1341365933 -0.1333810133 0.0776028661
## HotelCapacity 0.2599830516 -0.2561197059 0.1871502153
## IsTouristDestination IsWeekend IsNewYearEve
## Population -0.048202972 0.011592680 0.0007332482
## CityRank 0.280713452 -0.007256477 -0.0006326444
## IsMetroCity 0.176371706 0.001811801 0.0006464753
## IsTouristDestination 1.000000000 -0.019481101 -0.0022663884
## IsWeekend -0.019481101 1.000000000 0.2923820508
## IsNewYearEve -0.002266388 0.292382051 1.0000000000
## RoomRent 0.122502963 0.004580134 0.0384912269
## StarRating -0.040554998 0.006378436 0.0023608970
## HotelCapacity -0.094356091 0.006306507 0.0013526790
## RoomRent StarRating HotelCapacity
## Population -0.088728063 0.134136593 0.259983052
## CityRank 0.093985529 -0.133381013 -0.256119706
## IsMetroCity -0.066839771 0.077602866 0.187150215
## IsTouristDestination 0.122502963 -0.040554998 -0.094356091
## IsWeekend 0.004580134 0.006378436 0.006306507
## IsNewYearEve 0.038491227 0.002360897 0.001352679
## RoomRent 1.000000000 0.369373425 0.157873308
## StarRating 0.369373425 1.000000000 0.637430337
## HotelCapacity 0.157873308 0.637430337 1.000000000
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(mydata.df, lower.panel = panel.shade, upper.panel = panel.pie,
text.panel = panel.txt, main = "Corrgram of all variables")

#Correlation tests
cor.test(mydata.df$RoomRent, mydata.df$StarRating)
##
## Pearson's product-moment correlation
##
## data: mydata.df$RoomRent and mydata.df$StarRating
## t = 45.719, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3545660 0.3839956
## sample estimates:
## cor
## 0.3693734
cor.test(mydata.df$Population, mydata.df$CityRank)
##
## Pearson's product-moment correlation
##
## data: mydata.df$Population and mydata.df$CityRank
## t = -174.77, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8403980 -0.8300962
## sample estimates:
## cor
## -0.8353204
cor.test(mydata.df$RoomRent, mydata.df$IsNewYearEve)
##
## Pearson's product-moment correlation
##
## data: mydata.df$RoomRent and mydata.df$IsNewYearEve
## t = 4.4306, df = 13230, p-value = 9.472e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.02146637 0.05549377
## sample estimates:
## cor
## 0.03849123
#Chi-square Test
chisq.test(mydata.df$RoomRent)
##
## Chi-squared test for given probabilities
##
## data: mydata.df$RoomRent
## X-squared = 129980000, df = 13231, p-value < 2.2e-16
#p-value < 2.2e-16 (<0.05). We can reject the Null Hypothesis and accept Alternate Hypothesis.
#T-test
#Null Hypothesis - Their is no Difference between the Room Rent on new year's eve
#and on other days
t.test(mydata.df$RoomRent ~ mydata.df$IsNewYearEve)
##
## Welch Two Sample t-test
##
## data: mydata.df$RoomRent by mydata.df$IsNewYearEve
## t = -4.1793, df = 2065, p-value = 3.046e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1256.5297 -453.9099
## sample estimates:
## mean in group 0 mean in group 1
## 5367.606 6222.826
#P-Value = 3.046e-05 (<0.05) Which is small enough
#for Rejecting the Null Hupothesis and accepting Alternaye Hypothesis.
#Hence there is a significant difference
#between the Room Rent on new year's eve and on other days.
#Null Hypothesis - Their is no Difference between the Room Rent on weekends
#and on other days
t.test(mydata.df$RoomRent ~ mydata.df$IsWeekend)
##
## Welch Two Sample t-test
##
## data: mydata.df$RoomRent by mydata.df$IsWeekend
## t = -0.51853, df = 9999.4, p-value = 0.6041
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -331.2427 192.6559
## sample estimates:
## mean in group 0 mean in group 1
## 5430.835 5500.129
#P-Value = 0.6041 (>0.05). So we fail to reject Null Hupothesis
#Hence there is no significant difference
#between the Room Rent on weekends and on other days.