Cities <- read.csv(paste("Cities42.csv", sep=""))
View(Cities)
attach(Cities)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(Cities)
## vars n mean sd median trimmed
## CityName* 1 13232 18.07 11.72 16 17.29
## Population 2 13232 4416836.87 4258386.00 3046163 4040816.22
## CityRank 3 13232 14.83 13.51 9 13.30
## IsMetroCity 4 13232 0.28 0.45 0 0.23
## IsTouristDestination 5 13232 0.70 0.46 1 0.75
## IsWeekend 6 13232 0.62 0.48 1 0.65
## IsNewYearEve 7 13232 0.12 0.33 0 0.03
## Date* 8 13232 14.30 2.69 14 14.39
## HotelName* 9 13232 841.19 488.16 827 841.18
## RoomRent 10 13232 5473.99 7333.12 4000 4383.33
## StarRating 11 13232 3.46 0.76 3 3.40
## Airport 12 13232 21.16 22.76 15 16.39
## HotelAddress* 13 13232 1202.53 582.17 1261 1233.25
## HotelPincode 14 13232 397430.26 259837.50 395003 388540.47
## HotelDescription* 15 13224 581.34 363.26 567 575.37
## FreeWifi 16 13232 0.93 0.26 1 1.00
## FreeBreakfast 17 13232 0.65 0.48 1 0.69
## HotelCapacity 18 13232 62.51 76.66 34 46.03
## HasSwimmingPool 19 13232 0.36 0.48 0 0.32
## mad min max range skew
## CityName* 11.86 1.0 42 41.0 0.48
## Population 3846498.95 8096.0 12442373 12434277.0 0.68
## CityRank 11.86 0.0 44 44.0 0.69
## IsMetroCity 0.00 0.0 1 1.0 0.96
## IsTouristDestination 0.00 0.0 1 1.0 -0.86
## IsWeekend 0.00 0.0 1 1.0 -0.51
## IsNewYearEve 0.00 0.0 1 1.0 2.28
## Date* 2.97 1.0 20 19.0 -0.77
## HotelName* 641.97 1.0 1670 1669.0 0.01
## RoomRent 2653.85 299.0 322500 322201.0 16.75
## StarRating 0.74 0.0 5 5.0 0.48
## Airport 11.12 0.2 124 123.8 2.73
## HotelAddress* 668.65 1.0 2108 2107.0 -0.37
## HotelPincode 257975.37 100025.0 7000157 6900132.0 9.99
## HotelDescription* 472.95 1.0 1226 1225.0 0.11
## FreeWifi 0.00 0.0 1 1.0 -3.25
## FreeBreakfast 0.00 0.0 1 1.0 -0.62
## HotelCapacity 28.17 0.0 600 600.0 2.95
## HasSwimmingPool 0.00 0.0 1 1.0 0.60
## kurtosis se
## CityName* -0.88 0.10
## Population -1.08 37019.65
## CityRank -0.76 0.12
## IsMetroCity -1.08 0.00
## IsTouristDestination -1.26 0.00
## IsWeekend -1.74 0.00
## IsNewYearEve 3.18 0.00
## Date* 1.92 0.02
## HotelName* -1.25 4.24
## RoomRent 582.06 63.75
## StarRating 0.25 0.01
## Airport 7.89 0.20
## HotelAddress* -0.88 5.06
## HotelPincode 249.76 2258.86
## HotelDescription* -1.25 3.16
## FreeWifi 8.57 0.00
## FreeBreakfast -1.61 0.00
## HotelCapacity 11.39 0.67
## HasSwimmingPool -1.64 0.00
summary(Cities)
## CityName Population CityRank IsMetroCity
## Delhi :2048 Min. : 8096 Min. : 0.00 Min. :0.0000
## Jaipur : 768 1st Qu.: 744983 1st Qu.: 2.00 1st Qu.:0.0000
## Mumbai : 712 Median : 3046163 Median : 9.00 Median :0.0000
## Bangalore: 656 Mean : 4416837 Mean :14.83 Mean :0.2842
## Goa : 624 3rd Qu.: 8443675 3rd Qu.:24.00 3rd Qu.:1.0000
## Kochi : 608 Max. :12442373 Max. :44.00 Max. :1.0000
## (Other) :7816
## IsTouristDestination IsWeekend IsNewYearEve Date
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Dec 21 2016:1611
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Dec 24 2016:1611
## Median :1.0000 Median :1.0000 Median :0.0000 Dec 25 2016:1611
## Mean :0.6972 Mean :0.6228 Mean :0.1244 Dec 28 2016:1611
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 Dec 31 2016:1611
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Dec 18 2016:1608
## (Other) :3569
## HotelName RoomRent StarRating
## Vivanta by Taj : 32 Min. : 299 Min. :0.000
## Goldfinch Hotel : 24 1st Qu.: 2436 1st Qu.:3.000
## OYO Rooms : 24 Median : 4000 Median :3.000
## The Gordon House Hotel: 24 Mean : 5474 Mean :3.459
## Apnayt Villa : 16 3rd Qu.: 6299 3rd Qu.:4.000
## Bentleys Hotel Colaba : 16 Max. :322500 Max. :5.000
## (Other) :13096
## Airport
## Min. : 0.20
## 1st Qu.: 8.40
## Median : 15.00
## Mean : 21.16
## 3rd Qu.: 24.00
## Max. :124.00
##
## HotelAddress
## The Mall, Shimla : 32
## #2-91/14/8, White Fields, Kondapur, Hitech City, Hyderabad, 500084 India: 16
## 121, City Terrace, Walchand Hirachand Marg, Mumbai, Maharashtra : 16
## 14-4507/9, Balmatta Road, Near Jyothi Circle, Hampankatta : 16
## 144/7, Rajiv Gandi Salai (OMR), Kottivakkam, Chennai, Tamil Nadu : 16
## 17, Oliver Road, Colaba, Mumbai, Maharashtra : 16
## (Other) :13120
## HotelPincode HotelDescription FreeWifi FreeBreakfast
## Min. : 100025 3 : 120 Min. :0.0000 Min. :0.0000
## 1st Qu.: 221001 Abc : 112 1st Qu.:1.0000 1st Qu.:0.0000
## Median : 395003 3-star hotel: 104 Median :1.0000 Median :1.0000
## Mean : 397430 3.5 : 88 Mean :0.9259 Mean :0.6491
## 3rd Qu.: 570001 4 : 72 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :7000157 (Other) :12728 Max. :1.0000 Max. :1.0000
## NA's : 8
## HotelCapacity HasSwimmingPool
## Min. : 0.00 Min. :0.0000
## 1st Qu.: 16.00 1st Qu.:0.0000
## Median : 34.00 Median :0.0000
## Mean : 62.51 Mean :0.3558
## 3rd Qu.: 75.00 3rd Qu.:1.0000
## Max. :600.00 Max. :1.0000
##
Table <- with(Cities,table(CityName))
Table
## CityName
## Agra Ahmedabad Amritsar Bangalore
## 432 424 136 656
## Bhubaneswar Chandigarh Chennai Darjeeling
## 120 336 416 136
## Delhi Gangtok Goa Guwahati
## 2048 128 624 48
## Haridwar Hyderabad Indore Jaipur
## 48 536 160 768
## Jaisalmer Jodhpur Kanpur Kochi
## 264 224 16 608
## Kolkata Lucknow Madurai Manali
## 512 128 112 288
## Mangalore Mumbai Munnar Mysore
## 104 712 328 160
## Nainital Ooty Panchkula Pune
## 144 136 64 600
## Puri Rajkot Rishikesh Shimla
## 56 128 88 280
## Srinagar Surat Thiruvanthipuram Thrissur
## 40 80 392 32
## Udaipur Varanasi
## 456 264
Table1 <- with(Cities,table(IsMetroCity))
Table1
## IsMetroCity
## 0 1
## 9472 3760
Table2 <- with(Cities,table(IsTouristDestination))
Table2
## IsTouristDestination
## 0 1
## 4007 9225
Table3 <- with(Cities,table(IsWeekend))
Table3
## IsWeekend
## 0 1
## 4991 8241
Table4 <- with(Cities,table(IsNewYearEve))
Table4
## IsNewYearEve
## 0 1
## 11586 1646
Table5 <- with(Cities,table(HasSwimmingPool))
Table5
## HasSwimmingPool
## 0 1
## 8524 4708
t1 <- xtabs(~ StarRating + IsMetroCity, data=Cities)
t1
## IsMetroCity
## StarRating 0 1
## 0 16 0
## 1 8 0
## 2 344 96
## 2.5 456 176
## 3 4336 1617
## 3.2 8 0
## 3.3 16 0
## 3.4 8 0
## 3.5 1312 440
## 3.6 0 8
## 3.7 24 0
## 3.8 16 0
## 3.9 32 0
## 4 1696 767
## 4.1 24 0
## 4.3 16 0
## 4.4 8 0
## 4.5 288 88
## 4.7 8 0
## 4.8 16 0
## 5 840 568
t2 <- xtabs(~ FreeWifi+ FreeBreakfast, data=Cities)
t2
## FreeBreakfast
## FreeWifi 0 1
## 0 606 375
## 1 4037 8214
t3 <- xtabs(~ IsTouristDestination + IsNewYearEve, data=Cities)
t3
## IsNewYearEve
## IsTouristDestination 0 1
## 0 3504 503
## 1 8082 1143
hist(Cities$Population, main= "Population" ,xlab="Population" ,col = "blue")
hist(Cities$StarRating, main= "Star rating" ,xlab="Star rating",col = "blue")
hist(Cities$Airport, main = "Distance to nearest major airport distribution", xlab = "Distance to nearest major Airport in km",col = "blue")
hist(Cities$HotelCapacity, main = "Capacity of hotels", xlab = "Hotel Capacity", col = "blue")
hist(Cities$CityRank, main = "Distribution of rank of cities", xlab = "City rank", col = "blue")
boxplot(Cities$CityRank , horizontal =TRUE, main="city rank",col = "Red" )
boxplot(Cities$IsMetroCity, horizontal =TRUE, main="Metro city and Tourist destination",col = "light green" )
boxplot(Cities$IsTouristDestination, horizontal =TRUE, main="Metro city and Tourist destination",col = "light green" )
#Corrgram of Cities
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(Cities, order = TRUE, upper.panel = panel.pie)
x<-Cities[,c("RoomRent","StarRating","Airport","HotelCapacity")]
y<-Cities[,c("RoomRent","StarRating","Airport","HotelCapacity")]
cor(x,y)
## RoomRent StarRating Airport HotelCapacity
## RoomRent 1.00000000 0.36937343 0.04965324 0.1578733
## StarRating 0.36937343 1.00000000 -0.06091918 0.6374303
## Airport 0.04965324 -0.06091918 1.00000000 -0.1176721
## HotelCapacity 0.15787331 0.63743034 -0.11767207 1.0000000
corrgram(Cities[c("RoomRent","StarRating","Airport","HotelCapacity")], upper.panel = panel.pie)
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(RoomRent~StarRating, data=cities,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of Star Rating vs Room rent",
ylab="Room Rent",
xlab="Star Rating")
library(car)
scatterplot(RoomRent~CityRank, data=cities,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of City rank vs Room rent",
ylab="Room Rent",
xlab="Rank of City")
library(car)
scatterplot(RoomRent~ HasSwimmingPool, data=Cities,
spread=FALSE, smoother.args=list(lty=2),
main="Scatter plot of hotel with Swimming pool vs Room rent",
ylab="Room Rent",
xlab="Hotel with swimming pool")
library(car)
scatterplotMatrix(formula = ~ RoomRent + IsWeekend + IsNewYearEve +Airport , data = Cities, pch = 16)
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
## Warning in smoother(x, y, col = col[2], log.x = FALSE, log.y = FALSE,
## spread = spread, : could not fit smooth
t.test(Cities$RoomRent ~ Cities$IsWeekend)
##
## Welch Two Sample t-test
##
## data: Cities$RoomRent by Cities$IsWeekend
## t = -0.51853, df = 9999.4, p-value = 0.6041
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -331.2427 192.6559
## sample estimates:
## mean in group 0 mean in group 1
## 5430.835 5500.129
t.test(Cities$RoomRent ~ Cities$IsNewYearEve)
##
## Welch Two Sample t-test
##
## data: Cities$RoomRent by Cities$IsNewYearEve
## t = -4.1793, df = 2065, p-value = 3.046e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1256.5297 -453.9099
## sample estimates:
## mean in group 0 mean in group 1
## 5367.606 6222.826
t.test(Cities$RoomRent ~ Cities$IsMetroCity)
##
## Welch Two Sample t-test
##
## data: Cities$RoomRent by Cities$IsMetroCity
## t = 10.721, df = 13224, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 888.0308 1285.4102
## sample estimates:
## mean in group 0 mean in group 1
## 5782.794 4696.073
t.test(Cities$RoomRent ~ Cities$FreeWifi)
##
## Welch Two Sample t-test
##
## data: Cities$RoomRent by Cities$FreeWifi
## t = -0.76847, df = 1804.7, p-value = 0.4423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -360.5977 157.5701
## sample estimates:
## mean in group 0 mean in group 1
## 5380.004 5481.518
t.test(Cities$RoomRent ~ Cities$FreeBreakfast)
##
## Welch Two Sample t-test
##
## data: Cities$RoomRent by Cities$FreeBreakfast
## t = 0.98095, df = 6212.3, p-value = 0.3267
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -153.5017 460.9935
## sample estimates:
## mean in group 0 mean in group 1
## 5573.790 5420.044