email: recs.cse1638@gmail.com College: Rajkiya Engg. College Sonbhadra
The dimensions of the data set are 13232 rows and 19 columns
dim(hotel.df)
## [1] 13232 19
descriptive statistics (min, max, median etc) of each variable.
library(psych)
describe(hotel.df)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.19 488.16 827 841.18
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.53 582.17 1261 1233.25
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.34 363.26 567 575.37
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 641.97 1.0 1670 1669.0 0.01
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 472.95 1.0 1226 1225.0 0.11
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.25 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
one-way contingency tables for the categorical variables in your dataset.
table(hotel.df$CityName)
##
## Agra Ahmedabad Amritsar Bangalore
## 432 424 136 656
## Bhubaneswar Chandigarh Chennai Darjeeling
## 120 336 416 136
## Delhi Gangtok Goa Guwahati
## 2048 128 624 48
## Haridwar Hyderabad Indore Jaipur
## 48 536 160 768
## Jaisalmer Jodhpur Kanpur Kochi
## 264 224 16 608
## Kolkata Lucknow Madurai Manali
## 512 128 112 288
## Mangalore Mumbai Munnar Mysore
## 104 712 328 160
## Nainital Ooty Panchkula Pune
## 144 136 64 600
## Puri Rajkot Rishikesh Shimla
## 56 128 88 280
## Srinagar Surat Thiruvanthipuram Thrissur
## 40 80 392 32
## Udaipur Varanasi
## 456 264
attach(hotel.df)
table(Date)
## Date
## 04-Jan-16 04-Jan-17 08-Jan-16 08-Jan-17 18-Dec-16 21-Dec-16
## 31 13 31 13 44 44
## 24-Dec-16 25-Dec-16 28-Dec-16 31-Dec-16 Dec 18 2016 Dec 21 2016
## 44 44 44 44 1608 1611
## Dec 24 2016 Dec 25 2016 Dec 28 2016 Dec 31 2016 Jan 04 2017 Jan 08 2017
## 1611 1611 1611 1611 1548 1542
## Jan 4 2017 Jan 8 2017
## 60 67
table(StarRating)
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4 4.1
## 16 8 440 632 5953 8 16 8 1752 8 24 16 32 2463 24
## 4.3 4.4 4.5 4.7 4.8 5
## 16 8 376 8 16 1408
table(FreeBreakfast)
## FreeBreakfast
## 0 1
## 4643 8589
table(FreeWifi)
## FreeWifi
## 0 1
## 981 12251
table(HasSwimmingPool)
## HasSwimmingPool
## 0 1
## 8524 4708
table(IsMetroCity)
## IsMetroCity
## 0 1
## 9472 3760
two way contingency tables
xtabs(~HasSwimmingPool+StarRating)
## StarRating
## HasSwimmingPool 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7
## 0 8 8 392 616 5236 0 16 0 1272 0 0
## 1 8 0 48 16 717 8 0 8 480 8 24
## StarRating
## HasSwimmingPool 3.8 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 8 8 848 8 0 8 48 0 0 48
## 1 8 24 1615 16 16 0 328 8 16 1360
xtabs(~FreeWifi+StarRating)
## StarRating
## FreeWifi 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9
## 0 0 0 80 104 336 0 0 0 96 0 0 0 0
## 1 16 8 360 528 5617 8 16 8 1656 8 24 16 32
## StarRating
## FreeWifi 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 231 0 0 0 24 0 0 110
## 1 2232 24 16 8 352 8 16 1298
xtabs(~FreeBreakfast+StarRating)
## StarRating
## FreeBreakfast 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0 16 0 216 296 1789 0 8 0 661 8 0 8
## 1 0 8 224 336 4164 8 8 8 1091 0 24 8
## StarRating
## FreeBreakfast 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0 16 783 0 16 0 224 8 0 594
## 1 16 1680 24 0 8 152 0 16 814
prop.table(xtabs(~IsMetroCity+FreeWifi),1)*100
## FreeWifi
## IsMetroCity 0 1
## 0 8.847128 91.152872
## 1 3.803191 96.196809
Boxplot
boxplot(RoomRent,horizontal = TRUE,col = "green")
Distance from the airport
boxplot(Airport,horizontal = TRUE,col="red")
Hotel Capacity
boxplot(HotelCapacity,horizontal = TRUE,col="blue")
Histograms
hist(IsMetroCity,ylim = c(0,10000))
Roomrent
hist(RoomRent,xlab="Rent",ylab="no of rooms",xlim=c(0,100000),breaks = 100)
Room rent vs airport
plot(Airport~RoomRent)
Room rent vs hotel capacity
plot(HotelCapacity~RoomRent)
Room rent vs star rating
plot(StarRating~RoomRent)
Correlation Matrix
cor(hotel.df[,c(2:7,10:12,16:19)])
## Population CityRank IsMetroCity
## Population 1.0000000000 -0.8353204432 0.7712260105
## CityRank -0.8353204432 1.0000000000 -0.5643937903
## IsMetroCity 0.7712260105 -0.5643937903 1.0000000000
## IsTouristDestination -0.0482029722 0.2807134520 0.1763717063
## IsWeekend 0.0115926802 -0.0072564766 0.0018118005
## IsNewYearEve 0.0007332482 -0.0006326444 0.0006464753
## RoomRent -0.0887280632 0.0939855292 -0.0668397705
## StarRating 0.1341365933 -0.1333810133 0.0776028661
## Airport -0.2597010198 0.5059119892 -0.2073586125
## FreeWifi 0.1129334410 -0.1214309404 0.0868288677
## FreeBreakfast 0.0364278235 -0.0086837497 0.0513856623
## HotelCapacity 0.2599830516 -0.2561197059 0.1871502153
## HasSwimmingPool 0.0262590820 -0.1029737518 0.0214119243
## IsTouristDestination IsWeekend IsNewYearEve
## Population -0.048202972 0.011592680 7.332482e-04
## CityRank 0.280713452 -0.007256477 -6.326444e-04
## IsMetroCity 0.176371706 0.001811801 6.464753e-04
## IsTouristDestination 1.000000000 -0.019481101 -2.266388e-03
## IsWeekend -0.019481101 1.000000000 2.923821e-01
## IsNewYearEve -0.002266388 0.292382051 1.000000e+00
## RoomRent 0.122502963 0.004580134 3.849123e-02
## StarRating -0.040554998 0.006378436 2.360897e-03
## Airport 0.194422049 -0.002724756 4.598872e-04
## FreeWifi -0.061568821 0.002960828 2.787472e-05
## FreeBreakfast -0.071692559 -0.007612777 -2.606416e-03
## HotelCapacity -0.094356091 0.006306507 1.352679e-03
## HasSwimmingPool 0.042156280 0.004500461 1.122308e-03
## RoomRent StarRating Airport FreeWifi
## Population -0.088728063 0.134136593 -0.2597010198 1.129334e-01
## CityRank 0.093985529 -0.133381013 0.5059119892 -1.214309e-01
## IsMetroCity -0.066839771 0.077602866 -0.2073586125 8.682887e-02
## IsTouristDestination 0.122502963 -0.040554998 0.1944220492 -6.156882e-02
## IsWeekend 0.004580134 0.006378436 -0.0027247555 2.960828e-03
## IsNewYearEve 0.038491227 0.002360897 0.0004598872 2.787472e-05
## RoomRent 1.000000000 0.369373425 0.0496532442 3.627002e-03
## StarRating 0.369373425 1.000000000 -0.0609191837 1.800959e-02
## Airport 0.049653244 -0.060919184 1.0000000000 -9.452368e-02
## FreeWifi 0.003627002 0.018009594 -0.0945236768 1.000000e+00
## FreeBreakfast -0.010006370 -0.032892463 0.0242839409 1.582206e-01
## HotelCapacity 0.157873308 0.637430337 -0.1176720722 -8.703612e-03
## HasSwimmingPool 0.311657734 0.618214699 -0.1416665606 -2.407405e-02
## FreeBreakfast HotelCapacity HasSwimmingPool
## Population 0.036427824 0.259983052 0.026259082
## CityRank -0.008683750 -0.256119706 -0.102973752
## IsMetroCity 0.051385662 0.187150215 0.021411924
## IsTouristDestination -0.071692559 -0.094356091 0.042156280
## IsWeekend -0.007612777 0.006306507 0.004500461
## IsNewYearEve -0.002606416 0.001352679 0.001122308
## RoomRent -0.010006370 0.157873308 0.311657734
## StarRating -0.032892463 0.637430337 0.618214699
## Airport 0.024283941 -0.117672072 -0.141666561
## FreeWifi 0.158220597 -0.008703612 -0.024074046
## FreeBreakfast 1.000000000 -0.087165446 -0.061522132
## HotelCapacity -0.087165446 1.000000000 0.509045809
## HasSwimmingPool -0.061522132 0.509045809 1.000000000
Corrgram
library(corrgram)
corrgram(x=cor(hotel.df[,c(2:7,10:12,16:19)]))
Scatter plot matrix
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(x=cor(hotel.df[,c(2:7,10:12,16:19)]))
Pearson`s Correlation Test
cor.test(IsMetroCity,RoomRent)
##
## Pearson's product-moment correlation
##
## data: IsMetroCity and RoomRent
## t = -7.7053, df = 13230, p-value = 1.399e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08378329 -0.04985761
## sample estimates:
## cor
## -0.06683977
cor.test(RoomRent,StarRating)
##
## Pearson's product-moment correlation
##
## data: RoomRent and StarRating
## t = 45.719, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3545660 0.3839956
## sample estimates:
## cor
## 0.3693734
cor.test(IsWeekend,RoomRent)
##
## Pearson's product-moment correlation
##
## data: IsWeekend and RoomRent
## t = 0.52682, df = 13230, p-value = 0.5983
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01245978 0.02161739
## sample estimates:
## cor
## 0.004580134
cor.test(IsTouristDestination,RoomRent)
##
## Pearson's product-moment correlation
##
## data: IsTouristDestination and RoomRent
## t = 14.197, df = 13230, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1056846 0.1392512
## sample estimates:
## cor
## 0.122503
T-Test
t.test(hotel.df$RoomRent,hotel.df$StarRating)
##
## Welch Two Sample t-test
##
## data: hotel.df$RoomRent and hotel.df$StarRating
## t = 85.813, df = 13231, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5345.575 5595.491
## sample estimates:
## mean of x mean of y
## 5473.991838 3.458933
t.test(hotel.df$RoomRent,hotel.df$HotelCapacity)
##
## Welch Two Sample t-test
##
## data: hotel.df$RoomRent and hotel.df$HotelCapacity
## t = 84.882, df = 13234, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5286.515 5536.445
## sample estimates:
## mean of x mean of y
## 5473.99184 62.51164
#the p-value = 2.2e-16 (<0.05) We reject the Null hypothesis It Means Their exists a significant relation between Room Rent and Hotel Capacity