setwd("~/Desktop/5 SRM Kashish Mukheja/Downoad content")
hot1<-read.csv(paste("Cities42.csv",sep=""))
View(hot1)
colnames(hot1)
## [1] "CityName" "Population" "CityRank"
## [4] "IsMetroCity" "IsTouristDestination" "IsWeekend"
## [7] "IsNewYearEve" "Date" "HotelName"
## [10] "RoomRent" "StarRating" "Airport"
## [13] "HotelAddress" "HotelPincode" "HotelDescription"
## [16] "FreeWifi" "FreeBreakfast" "HotelCapacity"
## [19] "HasSwimmingPool"
dim(hot1)
## [1] 13232 19
library(psych)
describe(hot1)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.84 488.14 834 842.05
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.69 581.98 1261 1233.43
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.40 363.01 570 575.79
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 644.93 1.0 1670 1669.0 0.00
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 465.54 1.0 1226 1225.0 0.10
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.26 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
mytable1<-with(hot1,table(IsMetroCity))
View(mytable1)
round(prop.table(mytable1)*100,2)
## IsMetroCity
## 0 1
## 71.58 28.42
mytable2<-with(hot1,table(IsTouristDestination))
View(mytable2)
round(prop.table(mytable2)*100,2)
## IsTouristDestination
## 0 1
## 30.28 69.72
mytable3<-with(hot1,table(IsWeekend))
View(mytable3)
round(prop.table(mytable3)*100,2)
## IsWeekend
## 0 1
## 37.72 62.28
mytable4<-with(hot1,table(IsNewYearEve))
View(mytable4)
round(prop.table(mytable4)*100,2)
## IsNewYearEve
## 0 1
## 87.56 12.44
mytable5<-with(hot1,table(StarRating))
View(mytable5)
round(prop.table(mytable5)*100,2)
## StarRating
## 0 1 2 2.5 3 3.2 3.3 3.4 3.5 3.6 3.7 3.8
## 0.12 0.06 3.33 4.78 44.99 0.06 0.12 0.06 13.24 0.06 0.18 0.12
## 3.9 4 4.1 4.3 4.4 4.5 4.7 4.8 5
## 0.24 18.61 0.18 0.12 0.06 2.84 0.06 0.12 10.64
mytable6<-with(hot1,table(FreeWifi))
View(mytable6)
round(prop.table(mytable6)*100,2)
## FreeWifi
## 0 1
## 7.41 92.59
mytable7<-with(hot1,table(FreeBreakfast))
View(mytable7)
round(prop.table(mytable7)*100,2)
## FreeBreakfast
## 0 1
## 35.09 64.91
mytable8<-with(hot1,table(HasSwimmingPool))
View(mytable8)
round(prop.table(mytable8)*100,2)
## HasSwimmingPool
## 0 1
## 64.42 35.58
mytable9<-with(hot1,table(CityName))
View(mytable9)
round(prop.table(mytable9)*100,2)
## CityName
## Agra Ahmedabad Amritsar Bangalore
## 3.26 3.20 1.03 4.96
## Bhubaneswar Chandigarh Chennai Darjeeling
## 0.91 2.54 3.14 1.03
## Delhi Gangtok Goa Guwahati
## 15.48 0.97 4.72 0.36
## Haridwar Hyderabad Indore Jaipur
## 0.36 4.05 1.21 5.80
## Jaisalmer Jodhpur Kanpur Kochi
## 2.00 1.69 0.12 4.59
## Kolkata Lucknow Madurai Manali
## 3.87 0.97 0.85 2.18
## Mangalore Mumbai Munnar Mysore
## 0.79 5.38 2.48 1.21
## Nainital Ooty Panchkula Pune
## 1.09 1.03 0.48 4.53
## Puri Rajkot Rishikesh Shimla
## 0.42 0.97 0.67 2.12
## Srinagar Surat Thiruvanthipuram Thrissur
## 0.30 0.60 2.96 0.24
## Udaipur Varanasi
## 3.45 2.00
library(gmodels)
CrossTable(hot1$IsMetroCity,hot1$IsTouristDestination)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 13232
##
##
## | hot1$IsTouristDestination
## hot1$IsMetroCity | 0 | 1 | Row Total |
## -----------------|-----------|-----------|-----------|
## 0 | 3352 | 6120 | 9472 |
## | 81.543 | 35.419 | |
## | 0.354 | 0.646 | 0.716 |
## | 0.837 | 0.663 | |
## | 0.253 | 0.463 | |
## -----------------|-----------|-----------|-----------|
## 1 | 655 | 3105 | 3760 |
## | 205.419 | 89.226 | |
## | 0.174 | 0.826 | 0.284 |
## | 0.163 | 0.337 | |
## | 0.050 | 0.235 | |
## -----------------|-----------|-----------|-----------|
## Column Total | 4007 | 9225 | 13232 |
## | 0.303 | 0.697 | |
## -----------------|-----------|-----------|-----------|
##
##
CrossTable(hot1$IsWeekend,hot1$IsNewYearEve)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 13232
##
##
## | hot1$IsNewYearEve
## hot1$IsWeekend | 0 | 1 | Row Total |
## ---------------|-----------|-----------|-----------|
## 0 | 4989 | 2 | 4991 |
## | 87.637 | 616.864 | |
## | 1.000 | 0.000 | 0.377 |
## | 0.431 | 0.001 | |
## | 0.377 | 0.000 | |
## ---------------|-----------|-----------|-----------|
## 1 | 6597 | 1644 | 8241 |
## | 53.075 | 373.592 | |
## | 0.801 | 0.199 | 0.623 |
## | 0.569 | 0.999 | |
## | 0.499 | 0.124 | |
## ---------------|-----------|-----------|-----------|
## Column Total | 11586 | 1646 | 13232 |
## | 0.876 | 0.124 | |
## ---------------|-----------|-----------|-----------|
##
##
CrossTable(hot1$FreeWifi,hot1$FreeBreakfast)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 13232
##
##
## | hot1$FreeBreakfast
## hot1$FreeWifi | 0 | 1 | Row Total |
## --------------|-----------|-----------|-----------|
## 0 | 606 | 375 | 981 |
## | 199.074 | 107.614 | |
## | 0.618 | 0.382 | 0.074 |
## | 0.131 | 0.044 | |
## | 0.046 | 0.028 | |
## --------------|-----------|-----------|-----------|
## 1 | 4037 | 8214 | 12251 |
## | 15.941 | 8.617 | |
## | 0.330 | 0.670 | 0.926 |
## | 0.869 | 0.956 | |
## | 0.305 | 0.621 | |
## --------------|-----------|-----------|-----------|
## Column Total | 4643 | 8589 | 13232 |
## | 0.351 | 0.649 | |
## --------------|-----------|-----------|-----------|
##
##
CrossTable(hot1$FreeWifi,hot1$HasSwimmingPool)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 13232
##
##
## | hot1$HasSwimmingPool
## hot1$FreeWifi | 0 | 1 | Row Total |
## --------------|-----------|-----------|-----------|
## 0 | 592 | 389 | 981 |
## | 2.526 | 4.574 | |
## | 0.603 | 0.397 | 0.074 |
## | 0.069 | 0.083 | |
## | 0.045 | 0.029 | |
## --------------|-----------|-----------|-----------|
## 1 | 7932 | 4319 | 12251 |
## | 0.202 | 0.366 | |
## | 0.647 | 0.353 | 0.926 |
## | 0.931 | 0.917 | |
## | 0.599 | 0.326 | |
## --------------|-----------|-----------|-----------|
## Column Total | 8524 | 4708 | 13232 |
## | 0.644 | 0.356 | |
## --------------|-----------|-----------|-----------|
##
##
boxplot(hot1$RoomRent,
horizontal = TRUE,
xlab="Room Rent of the hotel",
main="Box plot of Room Rent Rating of hotel")
boxplot(hot1$StarRating,
xlab="Star Rating of the hotel",
main="Box plot of Star Rating of hotel",
horizontal = TRUE)
boxplot(hot1$Airport,
xlab="Distance between Hotel and closest major Airport(in km)",
main="Box plot of Airport Distance of hotel",
horizontal = TRUE)
boxplot(hot1$HotelCapacity,
xlab="Hotel Capacity",
main="Box plot of Hotel Capacity",
horizontal = TRUE)
##Histograms:-
library(lattice)
histogram(~StarRating,
data=hot1,
type="count",
nint=7,
xlab="Star Rating", main="Distrubtion of Star Ratings of hotels")
histogram(~HotelCapacity,
data=hot1,
type="count",
nint=12,
xlab="Hotel Capacity", main="Distrubtion of capacity of Hotels")
histogram(~Airport,
data=hot1,
type="count",
nint=12,
xlab="Distance from Airport", main="Distrubtion of distance to the nearest major airport")
library(corrgram)
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
corrgram(hot1, order = FALSE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt,main = "Corrgram of Hotel Data")
##ScatterPlot Matrix
pairs(formula = ~ RoomRent + IsWeekend + IsNewYearEve, data = hot1, pch = 16)
Ho:-There is no significant difference between the Room Rent of Hotels with swimmin pool and hotels without swimmin pool
H1:-There is a significant difference between the Room Rent of Hotels with swimmin pool and hotels without swimmin pool
t.test(hot1$RoomRent[hot1$HasSwimmingPool==0],hot1$RoomRent[hot1$HasSwimmingPool==1])
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$HasSwimmingPool == 0] and hot1$RoomRent[hot1$HasSwimmingPool == 1]
## t = -29.013, df = 5011.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5096.030 -4450.942
## sample estimates:
## mean of x mean of y
## 3775.566 8549.052
Inference:-Since p-vale<0.05, we accept H1,hence, there is a significant difference between the Room Rent of Hotels with swimming pool and hotels without swimming pool. ####1 Tail T-Test Ho:-There is no significant difference between the Room Rent of Hotels with swimming pool and hotels without swimmin pool
H1:-The Room Rent of Hotels with swimming pool is greater than the room rent of hotels without swimming pool
t.test(hot1$RoomRent[hot1$HasSwimmingPool==0],hot1$RoomRent[hot1$HasSwimmingPool==1],alternative = "less")
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$HasSwimmingPool == 0] and hot1$RoomRent[hot1$HasSwimmingPool == 1]
## t = -29.013, df = 5011.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -4502.814
## sample estimates:
## mean of x mean of y
## 3775.566 8549.052
Inference:-Since p-vale<0.05, we accept H1,hence the Room Rent of Hotels with swimming pool is greater than the room rent of hotels without swimming pool.
Ho:-There is no significant difference between the Room Rent of Hotels providing free wifi and those which do not
H1:-There is a significant difference between the Room Rent of Hotels providing free wifi and those which do not
t.test(hot1$RoomRent[hot1$FreeWifi==0],hot1$RoomRent[hot1$FreeWifi==1])
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$FreeWifi == 0] and hot1$RoomRent[hot1$FreeWifi == 1]
## t = -0.76847, df = 1804.7, p-value = 0.4423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -360.5977 157.5701
## sample estimates:
## mean of x mean of y
## 5380.004 5481.518
Inference:-Since p-vale>0.05, we accept Ho,hence there is no significant difference between the Room Rent of Hotels providing free wifi and those which do not.
Ho: There is no significant difference between the Room Rent of Hotels with free breakfast and hotels without free breakfast.
H1: There is a significant difference between the Room Rent of Hotels with free breakfast and hotels without free breakfast
t.test(hot1$RoomRent[hot1$FreeBreakfast==0],hot1$RoomRent[hot1$FreeBreakfast==1])
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$FreeBreakfast == 0] and hot1$RoomRent[hot1$FreeBreakfast == 1]
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -153.5017 460.9935
## sample estimates:
## mean of x mean of y
## 5573.790 5420.044
Inference: Since p-value>0.05, we accept H0,hence, There is no significant difference between the Room Rent of Hotels with free breakfast and hotels without free breakfast.
Ho: There is no significant difference between the Room Rent of Hotels on normal Eve and New Year’s Eve.
H1: The Room Rents of Hotels on normal Eve are cheaper than that on New Year’s Eve
t.test(hot1$RoomRent[hot1$IsNewYearEve==0],hot1$RoomRent[hot1$IsNewYearEve==1],alternative = "less")
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$IsNewYearEve == 0] and hot1$RoomRent[hot1$IsNewYearEve == 1]
## t = -4.1793, df = 2065, p-value = 1.523e-05
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -518.4763
## sample estimates:
## mean of x mean of y
## 5367.606 6222.826
Inference: Since p-value<0.05, we accept H1,hence,the Room Rents of Hotels on normal Eve are cheaper than that on New Year’s Eve.
Ho:-There is no significant difference between the Room Rent of Hotels on weekdays and weekends.
H1:-There is a significant difference between the Room Rent of Hotels on weekdays and weekends.
t.test(hot1$RoomRent[hot1$IsWeekend==0],hot1$RoomRent[hot1$IsWeekend==1])
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$IsWeekend == 0] and hot1$RoomRent[hot1$IsWeekend == 1]
## t = -0.51853, df = 9999.4, p-value = 0.6041
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -331.2427 192.6559
## sample estimates:
## mean of x mean of y
## 5430.835 5500.129
Inference:-Since p-vale>0.05, we accept Ho,hence there is no significant difference between the Room Rent of Hotels on weekdays and weekends.
Ho: There is no significant difference between the Room Rent of Hotels in Tourist destinations and non tourist destinations.
H1: The Room Rents of Hotels in Tourist destinations are greater than that in non tourist destinations
t.test(hot1$RoomRent[hot1$IsTouristDestination==0],hot1$RoomRent[hot1$IsTouristDestination==1],alternative = "less")
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$IsTouristDestination == 0] and hot1$RoomRent[hot1$IsTouristDestination == 1]
## t = -19.449, df = 12888, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -1789.665
## sample estimates:
## mean of x mean of y
## 4111.003 6066.024
Inference: Since p-value<0.05, we accept H1,hence,the Room.Rents of Hotels in Tourist destinations are greater than that in non tourist destinations
Ho: There is no significant difference between the Room Rent of Hotels in non-metro cities and metro cities.
H1: The Room Rent of hotels in non-metro cities are more expensive than that in metro cities.
t.test(hot1$RoomRent[hot1$IsMetroCity==0],hot1$RoomRent[hot1$IsMetroCity==1],alternative = "greater")
##
## Welch Two Sample t-test
##
## data: hot1$RoomRent[hot1$IsMetroCity == 0] and hot1$RoomRent[hot1$IsMetroCity == 1]
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 919.9785 Inf
## sample estimates:
## mean of x mean of y
## 5782.794 4696.073
Inference: Since p-value<0.05, we accept H1,hence,the Room Rents of Hotels in non-metro cities is more than that of metro cities.
1.HasSwimmingPool
2.IsNewYearEve
3.IsTouristDestination
4.IsMetroCity