Sameer Mathur
AllCities.df <- read.csv(paste("AllCitiesData.csv", sep=""))
attach(AllCities.df)
dim(AllCities.df)
[1] 6467 18
str(AllCities.df)
'data.frame': 6467 obs. of 18 variables:
$ CityName : Factor w/ 25 levels "Agra","Amritsar",..: 25 25 25 25 25 25 25 25 25 10 ...
$ Population : int 1201815 1201815 1201815 1201815 1201815 1201815 1201815 1201815 1201815 1637875 ...
$ CityRank : int 15 15 15 15 15 15 15 15 15 17 ...
$ IsTouristDestination: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
$ IsWeekend : Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 1 2 2 1 ...
$ IsNewYearEve : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
$ Date : Factor w/ 8 levels "Dec 18 2016",..: 1 2 4 7 8 1 2 4 8 2 ...
$ HotelName : Factor w/ 813 levels "14 Square Amanora",..: 366 366 366 366 366 654 654 654 654 767 ...
$ RoomRent : int 75000 75000 75000 75000 75000 75000 75000 75000 75000 71900 ...
$ StarRating : int 5 5 5 5 5 4 4 4 4 5 ...
$ Airport : num 19 19 19 19 19 19 19 19 19 1.1 ...
$ HotelAddress : Factor w/ 1021 levels "#33, The Mall,Sona Chandi Towers,Amritsar,,India",..: 681 681 681 681 681 680 680 680 680 700 ...
$ HotelPincode : int 221002 221002 221002 221002 221002 221002 221002 221002 221002 342006 ...
$ HotelDescription : Factor w/ 596 levels "10 star hotel near Queensroad, Amritsar",..: 502 502 502 502 502 502 502 502 502 464 ...
$ FreeWifi : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
$ FreeBreakfast : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 1 1 ...
$ HotelCapacity : int 10 10 10 10 10 10 10 10 10 64 ...
$ HasSwimmingPool : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
unique(CityName)
[1] Varanasi Jodhpur Jaipur Agra
[5] Udaipur Thiruvanthipuram Kochi Goa
[9] Chandigarh Jaisalmer Rajkot Srinagar
[13] Madurai Lucknow Rishikesh Bhubaneswar
[17] Manali Guwahati Amritsar Pune
[21] Indore Panchkula Surat Mangalore
[25] Kanpur
25 Levels: Agra Amritsar Bhubaneswar Chandigarh Goa Guwahati ... Varanasi
library(psych)
describe(AllCities.df)
vars n mean sd median trimmed
CityName* 1 6467 12.29 7.43 12 12.12
Population 2 6467 1515470.43 1083108.26 1457723 1477339.25
CityRank 3 6467 16.03 9.32 15 15.20
IsTouristDestination* 4 6467 1.66 0.47 2 1.71
IsWeekend* 5 6467 1.63 0.48 2 1.66
IsNewYearEve* 6 6467 1.13 0.33 1 1.03
Date* 7 6467 4.50 2.29 4 4.50
HotelName* 8 6467 407.88 236.05 406 408.14
RoomRent 9 6467 6024.90 7443.75 4000 4507.31
StarRating 10 6467 3.42 0.82 3 3.42
Airport 11 6467 15.17 11.51 12 13.78
HotelAddress* 12 6467 588.73 272.41 614 603.72
HotelPincode 13 6467 388606.33 175832.84 342802 377435.39
HotelDescription* 14 6467 289.06 175.55 284 286.86
FreeWifi* 15 6467 1.92 0.28 2 2.00
FreeBreakfast* 16 6467 1.61 0.49 2 1.64
HotelCapacity 17 6467 49.55 52.68 30 38.89
HasSwimmingPool* 18 6467 1.40 0.49 1 1.38
mad min max range skew kurtosis
CityName* 8.90 1.0 25 24.0 0.25 -1.17
Population 1278220.62 8096.0 4467797 4459701.0 0.51 -0.82
CityRank 11.86 3.0 37 34.0 0.52 -0.49
IsTouristDestination* 0.00 1.0 2 1.0 -0.70 -1.51
IsWeekend* 0.00 1.0 2 1.0 -0.52 -1.73
IsNewYearEve* 0.00 1.0 2 1.0 2.27 3.13
Date* 2.97 1.0 8 7.0 0.00 -1.24
HotelName* 306.90 1.0 813 812.0 0.00 -1.21
RoomRent 2596.03 299.0 75000 74701.0 4.65 28.66
StarRating 1.48 0.0 5 5.0 0.01 0.20
Airport 10.82 0.2 61 60.8 1.08 1.00
HotelAddress* 295.04 1.0 1021 1020.0 -0.38 -0.76
HotelPincode 101131.11 134109.0 781022 646913.0 0.71 -0.57
HotelDescription* 234.25 1.0 596 595.0 0.08 -1.28
FreeWifi* 0.00 1.0 2 1.0 -3.00 7.02
FreeBreakfast* 0.00 1.0 2 1.0 -0.46 -1.79
HotelCapacity 22.24 1.0 414 413.0 2.35 7.19
HasSwimmingPool* 0.00 1.0 2 1.0 0.40 -1.84
se
CityName* 0.09
Population 13468.54
CityRank 0.12
IsTouristDestination* 0.01
IsWeekend* 0.01
IsNewYearEve* 0.00
Date* 0.03
HotelName* 2.94
RoomRent 92.56
StarRating 0.01
Airport 0.14
HotelAddress* 3.39
HotelPincode 2186.50
HotelDescription* 2.18
FreeWifi* 0.00
FreeBreakfast* 0.01
HotelCapacity 0.66
HasSwimmingPool* 0.01
library(psych)
describe(AllCities.df)[, c(1:4, 8:9)] # selected columns
vars n mean sd min max
CityName* 1 6467 12.29 7.43 1.0 25
Population 2 6467 1515470.43 1083108.26 8096.0 4467797
CityRank 3 6467 16.03 9.32 3.0 37
IsTouristDestination* 4 6467 1.66 0.47 1.0 2
IsWeekend* 5 6467 1.63 0.48 1.0 2
IsNewYearEve* 6 6467 1.13 0.33 1.0 2
Date* 7 6467 4.50 2.29 1.0 8
HotelName* 8 6467 407.88 236.05 1.0 813
RoomRent 9 6467 6024.90 7443.75 299.0 75000
StarRating 10 6467 3.42 0.82 0.0 5
Airport 11 6467 15.17 11.51 0.2 61
HotelAddress* 12 6467 588.73 272.41 1.0 1021
HotelPincode 13 6467 388606.33 175832.84 134109.0 781022
HotelDescription* 14 6467 289.06 175.55 1.0 596
FreeWifi* 15 6467 1.92 0.28 1.0 2
FreeBreakfast* 16 6467 1.61 0.49 1.0 2
HotelCapacity 17 6467 49.55 52.68 1.0 414
HasSwimmingPool* 18 6467 1.40 0.49 1.0 2
# median of hotels room rent
median(RoomRent)
[1] 4000