READING AND DESCRIBING DATA

Read the Data

# reading data
Indianapolis.df <- read.csv(paste("2HIndianapolisData.csv"))      
attach(Indianapolis.df)
# dimension of the data frame
dim(Indianapolis.df)    
## [1] 500  19

Conversion of data columns in required data frame

Indianapolis.df$Day <- as.factor(Indianapolis.df$Day)
Indianapolis.df$IsWeekend <- as.factor(Indianapolis.df$IsWeekend)
Indianapolis.df$Available <- as.factor(Indianapolis.df$Available)
Indianapolis.df$StarRating <- as.factor(Indianapolis.df$StarRating)
#Indianapolis.df$GuestRating <- as.factor(Indianapolis.df$GuestRating)
Indianapolis.df$FreeWifi <- as.factor(Indianapolis.df$FreeWifi)
Indianapolis.df$FreeBreakfast <- as.factor(Indianapolis.df$FreeBreakfast)
Indianapolis.df$HasSwimmingPool <- as.factor(Indianapolis.df$HasSwimmingPool)

Data Structure

# data structure of the dataframe
str(Indianapolis.df)
## 'data.frame':    500 obs. of  19 variables:
##  $ CityName        : Factor w/ 1 level "Indianapolis": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Population      : int  864771 864771 864771 864771 864771 864771 864771 864771 864771 864771 ...
##  $ IsTourist       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Day             : Factor w/ 7 levels "Friday","Monday",..: 7 5 1 3 4 2 6 7 5 1 ...
##  $ Date            : Factor w/ 10 levels "Dec 1 2017","Dec 2 2017",..: 9 10 1 2 3 4 5 6 7 8 ...
##  $ IsWeekend       : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 2 ...
##  $ HotelName       : Factor w/ 49 levels "Baymont Inn & Suites Indianapolis, Indianapolis",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Available       : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 2 2 2 ...
##  $ MaxRentUSD      : int  135 135 135 169 135 135 135 135 135 169 ...
##  $ RentUSD         : int  79 79 135 169 79 79 79 79 89 169 ...
##  $ StarRating      : Factor w/ 7 levels "2","2.5","3",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ GuestRating     : num  4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 ...
##  $ HotelAddress    : Factor w/ 49 levels "10 East Market Street, Indianapolis, IN, 46204, United States of America, 855-239-9477",..: 16 16 16 16 16 16 16 16 16 16 ...
##  $ HotelPincode    : int  46239 46239 46239 46239 46239 46239 46239 46239 46239 46239 ...
##  $ HotelDescription: Factor w/ 40 levels "3-star B&B, convenient to Bankers Life Fieldhouse",..: 37 37 37 37 37 37 37 37 37 37 ...
##  $ FreeWifi        : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ FreeBreakfast   : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
##  $ HotelCapacity   : int  75 75 75 75 75 75 75 75 75 75 ...
##  $ HasSwimmingPool : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...

Unique Hotel Names and ZipCode

# number of unique hotels and zipcodes
length(unique(Indianapolis.df$HotelName))
## [1] 49
length(unique(Indianapolis.df$HotelPincode))
## [1] 15

Descriptive statistics

library(psych)
# descriptive statistics of hotels for city Indianapolis
describe(Indianapolis.df)[, c(1:10)]
##                   vars   n      mean     sd   median   trimmed   mad
## CityName*            1 500      1.00   0.00      1.0      1.00  0.00
## Population           2 500 864771.00   0.00 864771.0 864771.00  0.00
## IsTourist            3 500      0.00   0.00      0.0      0.00  0.00
## Day*                 4 500      4.10   2.17      4.5      4.12  2.97
## Date*                5 500      5.50   2.88      5.5      5.50  3.71
## IsWeekend*           6 500      1.30   0.46      1.0      1.25  0.00
## HotelName*           7 500     24.88  14.04     24.5     24.85 17.79
## Available*           8 500      1.65   0.48      2.0      1.69  0.00
## MaxRentUSD           9 500    213.92  86.73    195.0    206.50 83.03
## RentUSD             10 500    182.15  87.13    161.0    170.39 66.72
## StarRating*         11 500      3.15   1.13      3.0      3.08  1.48
## GuestRating         12 500      4.24   0.33      4.2      4.26  0.30
## HotelAddress*       13 500     25.22  14.10     25.5     25.27 17.79
## HotelPincode        14 500  46216.32  20.52  46211.5  46215.05 14.08
## HotelDescription*   15 500     20.34  11.82     19.5     20.38 14.08
## FreeWifi*           16 500      1.96   0.20      2.0      2.00  0.00
## FreeBreakfast*      17 500      1.62   0.49      2.0      1.65  0.00
## HotelCapacity       18 500    187.16 182.40    127.0    157.35 87.47
## HasSwimmingPool*    19 500      1.64   0.48      2.0      1.68  0.00
##                        min      max  range
## CityName*              1.0      1.0    0.0
## Population        864771.0 864771.0    0.0
## IsTourist              0.0      0.0    0.0
## Day*                   1.0      7.0    6.0
## Date*                  1.0     10.0    9.0
## IsWeekend*             1.0      2.0    1.0
## HotelName*             1.0     49.0   48.0
## Available*             1.0      2.0    1.0
## MaxRentUSD            65.0    529.0  464.0
## RentUSD               55.0    529.0  474.0
## StarRating*            1.0      7.0    6.0
## GuestRating            3.4      4.8    1.4
## HotelAddress*          1.0     49.0   48.0
## HotelPincode       46143.0  46290.0  147.0
## HotelDescription*      1.0     40.0   39.0
## FreeWifi*              1.0      2.0    1.0
## FreeBreakfast*         1.0      2.0    1.0
## HotelCapacity          3.0   1005.0 1002.0
## HasSwimmingPool*       1.0      2.0    1.0