Descriptive Statistics -- Hotels

Sameer Mathur

BASIC DATA SUMMARY

Read the data

AllCities.df <- read.csv(paste("AllCitiesData.csv", sep=""))
attach(AllCities.df)
dim(AllCities.df)
[1] 6467   18

Data Structure

str(AllCities.df)
'data.frame':   6467 obs. of  18 variables:
 $ CityName            : Factor w/ 25 levels "Agra","Amritsar",..: 25 25 25 25 25 25 25 25 25 10 ...
 $ Population          : int  1201815 1201815 1201815 1201815 1201815 1201815 1201815 1201815 1201815 1637875 ...
 $ CityRank            : int  15 15 15 15 15 15 15 15 15 17 ...
 $ IsTouristDestination: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ IsWeekend           : Factor w/ 2 levels "No","Yes": 2 1 2 1 2 2 1 2 2 1 ...
 $ IsNewYearEve        : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ Date                : Factor w/ 8 levels "Dec 18 2016",..: 1 2 4 7 8 1 2 4 8 2 ...
 $ HotelName           : Factor w/ 813 levels "14 Square Amanora",..: 366 366 366 366 366 654 654 654 654 767 ...
 $ RoomRent            : int  75000 75000 75000 75000 75000 75000 75000 75000 75000 71900 ...
 $ StarRating          : int  5 5 5 5 5 4 4 4 4 5 ...
 $ Airport             : num  19 19 19 19 19 19 19 19 19 1.1 ...
 $ HotelAddress        : Factor w/ 1021 levels "#33, The Mall,Sona Chandi Towers,Amritsar,,India",..: 681 681 681 681 681 680 680 680 680 700 ...
 $ HotelPincode        : int  221002 221002 221002 221002 221002 221002 221002 221002 221002 342006 ...
 $ HotelDescription    : Factor w/ 596 levels "10 star hotel near Queensroad, Amritsar",..: 502 502 502 502 502 502 502 502 502 464 ...
 $ FreeWifi            : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
 $ FreeBreakfast       : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 1 1 1 1 ...
 $ HotelCapacity       : int  10 10 10 10 10 10 10 10 10 64 ...
 $ HasSwimmingPool     : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...

Number of unique cities

unique(CityName)
 [1] Varanasi         Jodhpur          Jaipur           Agra            
 [5] Udaipur          Thiruvanthipuram Kochi            Goa             
 [9] Chandigarh       Jaisalmer        Rajkot           Srinagar        
[13] Madurai          Lucknow          Rishikesh        Bhubaneswar     
[17] Manali           Guwahati         Amritsar         Pune            
[21] Indore           Panchkula        Surat            Mangalore       
[25] Kanpur          
25 Levels: Agra Amritsar Bhubaneswar Chandigarh Goa Guwahati ... Varanasi

Descriptive statistics using describe()

library(psych)
describe(AllCities.df)
                      vars    n       mean         sd  median    trimmed
CityName*                1 6467      12.29       7.43      12      12.12
Population               2 6467 1515470.43 1083108.26 1457723 1477339.25
CityRank                 3 6467      16.03       9.32      15      15.20
IsTouristDestination*    4 6467       1.66       0.47       2       1.71
IsWeekend*               5 6467       1.63       0.48       2       1.66
IsNewYearEve*            6 6467       1.13       0.33       1       1.03
Date*                    7 6467       4.50       2.29       4       4.50
HotelName*               8 6467     407.88     236.05     406     408.14
RoomRent                 9 6467    6024.90    7443.75    4000    4507.31
StarRating              10 6467       3.42       0.82       3       3.42
Airport                 11 6467      15.17      11.51      12      13.78
HotelAddress*           12 6467     588.73     272.41     614     603.72
HotelPincode            13 6467  388606.33  175832.84  342802  377435.39
HotelDescription*       14 6467     289.06     175.55     284     286.86
FreeWifi*               15 6467       1.92       0.28       2       2.00
FreeBreakfast*          16 6467       1.61       0.49       2       1.64
HotelCapacity           17 6467      49.55      52.68      30      38.89
HasSwimmingPool*        18 6467       1.40       0.49       1       1.38
                             mad      min     max     range  skew kurtosis
CityName*                   8.90      1.0      25      24.0  0.25    -1.17
Population            1278220.62   8096.0 4467797 4459701.0  0.51    -0.82
CityRank                   11.86      3.0      37      34.0  0.52    -0.49
IsTouristDestination*       0.00      1.0       2       1.0 -0.70    -1.51
IsWeekend*                  0.00      1.0       2       1.0 -0.52    -1.73
IsNewYearEve*               0.00      1.0       2       1.0  2.27     3.13
Date*                       2.97      1.0       8       7.0  0.00    -1.24
HotelName*                306.90      1.0     813     812.0  0.00    -1.21
RoomRent                 2596.03    299.0   75000   74701.0  4.65    28.66
StarRating                  1.48      0.0       5       5.0  0.01     0.20
Airport                    10.82      0.2      61      60.8  1.08     1.00
HotelAddress*             295.04      1.0    1021    1020.0 -0.38    -0.76
HotelPincode           101131.11 134109.0  781022  646913.0  0.71    -0.57
HotelDescription*         234.25      1.0     596     595.0  0.08    -1.28
FreeWifi*                   0.00      1.0       2       1.0 -3.00     7.02
FreeBreakfast*              0.00      1.0       2       1.0 -0.46    -1.79
HotelCapacity              22.24      1.0     414     413.0  2.35     7.19
HasSwimmingPool*            0.00      1.0       2       1.0  0.40    -1.84
                            se
CityName*                 0.09
Population            13468.54
CityRank                  0.12
IsTouristDestination*     0.01
IsWeekend*                0.01
IsNewYearEve*             0.00
Date*                     0.03
HotelName*                2.94
RoomRent                 92.56
StarRating                0.01
Airport                   0.14
HotelAddress*             3.39
HotelPincode           2186.50
HotelDescription*         2.18
FreeWifi*                 0.00
FreeBreakfast*            0.01
HotelCapacity             0.66
HasSwimmingPool*          0.01

Descriptive statistics using describe()

library(psych)
describe(AllCities.df)[, c(1:4, 8:9)]   # selected columns
                      vars    n       mean         sd      min     max
CityName*                1 6467      12.29       7.43      1.0      25
Population               2 6467 1515470.43 1083108.26   8096.0 4467797
CityRank                 3 6467      16.03       9.32      3.0      37
IsTouristDestination*    4 6467       1.66       0.47      1.0       2
IsWeekend*               5 6467       1.63       0.48      1.0       2
IsNewYearEve*            6 6467       1.13       0.33      1.0       2
Date*                    7 6467       4.50       2.29      1.0       8
HotelName*               8 6467     407.88     236.05      1.0     813
RoomRent                 9 6467    6024.90    7443.75    299.0   75000
StarRating              10 6467       3.42       0.82      0.0       5
Airport                 11 6467      15.17      11.51      0.2      61
HotelAddress*           12 6467     588.73     272.41      1.0    1021
HotelPincode            13 6467  388606.33  175832.84 134109.0  781022
HotelDescription*       14 6467     289.06     175.55      1.0     596
FreeWifi*               15 6467       1.92       0.28      1.0       2
FreeBreakfast*          16 6467       1.61       0.49      1.0       2
HotelCapacity           17 6467      49.55      52.68      1.0     414
HasSwimmingPool*        18 6467       1.40       0.49      1.0       2

Median of Hotels RoomRent

# median of hotels room rent
median(RoomRent)
[1] 4000