READING AND DESCRIBING DATA
Read the Data
# reading data
Indianapolis.df <- read.csv(paste("IndianapolisData.csv"))
# dimension of the data frame
dim(Indianapolis.df)
[1] 500 24
Data Structure
attach(Indianapolis.df)
# data structure of the dataframe
str(Indianapolis.df)
'data.frame': 500 obs. of 24 variables:
$ CityName : Factor w/ 1 level "Indianapolis": 1 1 1 1 1 1 1 1 1 1 ...
$ Population : int 864771 864771 864771 864771 864771 864771 864771 864771 864771 864771 ...
$ IsTourist : int 0 0 0 0 0 0 0 0 0 0 ...
$ Day : Factor w/ 7 levels "Friday","Monday",..: 7 5 1 3 4 2 6 7 5 1 ...
$ Date : Factor w/ 10 levels "Dec 1 2017","Dec 2 2017",..: 9 10 1 2 3 4 5 6 7 8 ...
$ IsWeekend : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 2 ...
$ HotelName : Factor w/ 49 levels "Baymont Inn & Suites Indianapolis, Indianapolis",..: 29 29 29 29 29 29 29 29 29 29 ...
$ Available : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 2 2 2 ...
$ MaxRentUSD : int 84 84 119 119 84 90 84 90 90 97 ...
$ RentUSD : int 71 71 107 119 71 76 71 76 76 87 ...
$ StarRating : Factor w/ 7 levels "2","2.5","3",..: 2 2 2 2 2 2 2 2 2 2 ...
$ GuestRating : num 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 ...
$ HotelAddress : Factor w/ 49 levels "10 East Market Street, Indianapolis, IN, 46204, United States of America, 855-239-9477",..: 11 11 11 11 11 11 11 11 11 11 ...
$ HotelPincode : int 46143 46143 46143 46143 46143 46143 46143 46143 46143 46143 ...
$ HotelDescription : Factor w/ 40 levels "3-star B&B, convenient to Bankers Life Fieldhouse",..: 36 36 36 36 36 36 36 36 36 36 ...
$ FreeWifi : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ FreeBreakfast : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ HotelCapacity : int 81 81 81 81 81 81 81 81 81 81 ...
$ HasSwimmingPool : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ IsMarriott : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ IsHilton : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ IsMarriottOrHilton : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ MedianHomeValue : int 149000 149000 149000 149000 149000 149000 149000 149000 149000 149000 ...
$ MedianHouseHoldIncome: int 63265 63265 63265 63265 63265 63265 63265 63265 63265 63265 ...
Descriptive statistics
library(psych)
# descriptive statistics of hotels for city Indianapolis
describe(Indianapolis.df)[, c(1:10)]
vars n mean sd median trimmed mad min max range
CityName* 1 500 1.00 0.00 1.0 1.00 0.00 1.0 1.0 0.0
Population 2 500 864771.00 0.00 864771.0 864771.00 0.00 864771.0 864771.0 0.0
IsTourist 3 500 0.00 0.00 0.0 0.00 0.00 0.0 0.0 0.0
Day* 4 500 4.10 2.17 4.5 4.12 2.97 1.0 7.0 6.0
Date* 5 500 5.50 2.88 5.5 5.50 3.71 1.0 10.0 9.0
IsWeekend* 6 500 1.30 0.46 1.0 1.25 0.00 1.0 2.0 1.0
HotelName* 7 500 24.88 14.04 24.5 24.85 17.79 1.0 49.0 48.0
Available* 8 500 1.65 0.48 2.0 1.69 0.00 1.0 2.0 1.0
MaxRentUSD 9 500 213.92 86.73 195.0 206.50 83.03 65.0 529.0 464.0
RentUSD 10 500 182.15 87.13 161.0 170.39 66.72 55.0 529.0 474.0
StarRating* 11 500 3.15 1.13 3.0 3.08 1.48 1.0 7.0 6.0
GuestRating 12 500 4.24 0.33 4.2 4.26 0.30 3.4 4.8 1.4
HotelAddress* 13 500 25.22 14.10 25.5 25.27 17.79 1.0 49.0 48.0
HotelPincode 14 500 46216.32 20.52 46211.5 46215.05 14.08 46143.0 46290.0 147.0
HotelDescription* 15 500 20.34 11.82 19.5 20.38 14.08 1.0 40.0 39.0
FreeWifi* 16 500 1.96 0.20 2.0 2.00 0.00 1.0 2.0 1.0
FreeBreakfast* 17 500 1.62 0.49 2.0 1.65 0.00 1.0 2.0 1.0
HotelCapacity 18 500 187.16 182.40 127.0 157.35 87.47 3.0 1005.0 1002.0
HasSwimmingPool* 19 500 1.64 0.48 2.0 1.68 0.00 1.0 2.0 1.0
IsMarriott* 20 500 1.20 0.40 1.0 1.12 0.00 1.0 2.0 1.0
IsHilton* 21 500 1.14 0.35 1.0 1.05 0.00 1.0 2.0 1.0
IsMarriottOrHilton* 22 500 1.34 0.47 1.0 1.30 0.00 1.0 2.0 1.0
MedianHomeValue 23 500 160134.00 93518.00 168900.0 154210.00 140624.61 58000.0 489500.0 431500.0
MedianHouseHoldIncome 24 500 38811.60 18926.22 34600.0 35494.05 7748.07 24548.0 151557.0 127009.0
Regression 0
Model0 <- RentUSD ~ HotelCapacity + IsMarriott + IsHilton
fit0 <- lm(Model0, data = Indianapolis.df)
summary(fit0)
Call:
lm(formula = Model0, data = Indianapolis.df)
Residuals:
Min 1Q Median 3Q Max
-137.39 -53.39 -21.36 33.53 365.33
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 140.70085 5.03433 27.948 < 2e-16 ***
HotelCapacity 0.10988 0.02243 4.899 1.30e-06 ***
IsMarriott1 51.13125 10.41069 4.911 1.23e-06 ***
IsHilton1 76.12973 10.25612 7.423 5.02e-13 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 76.36 on 496 degrees of freedom
Multiple R-squared: 0.2366, Adjusted R-squared: 0.232
F-statistic: 51.24 on 3 and 496 DF, p-value: < 2.2e-16
Regression 1
Model1 <- RentUSD ~ MedianHomeValue + MedianHouseHoldIncome + IsMarriott + IsHilton
fit1 <- lm(Model1, data = Indianapolis.df)
summary(fit1)
Call:
lm(formula = Model1, data = Indianapolis.df)
Residuals:
Min 1Q Median 3Q Max
-127.82 -53.88 -17.82 25.65 353.44
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.620e+02 8.497e+00 19.061 < 2e-16 ***
MedianHomeValue 1.613e-04 5.919e-05 2.725 0.00665 **
MedianHouseHoldIncome -7.309e-04 2.682e-04 -2.726 0.00665 **
IsMarriott1 6.369e+01 1.041e+01 6.116 1.96e-09 ***
IsHilton1 7.126e+01 1.147e+01 6.213 1.10e-09 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 77.59 on 495 degrees of freedom
Multiple R-squared: 0.2133, Adjusted R-squared: 0.207
F-statistic: 33.56 on 4 and 495 DF, p-value: < 2.2e-16
Regression 2
Model1 <- RentUSD ~ MedianHomeValue + IsMarriott + IsHilton + HotelCapacity + HasSwimmingPool + FreeBreakfast + FreeWifi + StarRating + IsWeekend + Available + MedianHouseHoldIncome
fit1 <- lm(Model1, data = Indianapolis.df)
summary(fit1)
Call:
lm(formula = Model1, data = Indianapolis.df)
Residuals:
Min 1Q Median 3Q Max
-144.831 -34.074 -8.823 26.690 312.452
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.527e+01 2.372e+01 1.487 0.137592
MedianHomeValue -4.139e-05 5.312e-05 -0.779 0.436212
IsMarriott1 5.889e+01 1.065e+01 5.530 5.24e-08 ***
IsHilton1 7.681e+01 1.013e+01 7.584 1.74e-13 ***
HotelCapacity 4.585e-02 2.600e-02 1.764 0.078427 .
HasSwimmingPool1 -5.248e+00 6.622e+00 -0.793 0.428413
FreeBreakfast1 1.982e+01 8.039e+00 2.465 0.014042 *
FreeWifi1 1.001e+02 1.746e+01 5.732 1.75e-08 ***
StarRating2.5 6.820e+00 1.474e+01 0.463 0.643781
StarRating3 4.439e+01 1.461e+01 3.038 0.002514 **
StarRating3.5 5.236e+01 1.518e+01 3.449 0.000613 ***
StarRating4 1.427e+02 1.848e+01 7.722 6.66e-14 ***
StarRating4.5 6.585e+01 2.543e+01 2.589 0.009916 **
StarRating359 1.991e+02 6.188e+01 3.217 0.001383 **
IsWeekend1 3.172e+01 6.619e+00 4.792 2.20e-06 ***
Available1 -6.092e+01 6.611e+00 -9.216 < 2e-16 ***
MedianHouseHoldIncome 2.019e-05 2.344e-04 0.086 0.931400
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 58.99 on 483 degrees of freedom
Multiple R-squared: 0.5562, Adjusted R-squared: 0.5415
F-statistic: 37.84 on 16 and 483 DF, p-value: < 2.2e-16
Regression 3
Model3 <- RentUSD ~ MedianHomeValue + IsMarriott + IsHilton + HotelCapacity + HasSwimmingPool + FreeBreakfast + StarRating + IsWeekend + Available + MedianHouseHoldIncome
fit3 <- lm(Model3, data = Indianapolis.df)
summary(fit3)
Call:
lm(formula = Model3, data = Indianapolis.df)
Residuals:
Min 1Q Median 3Q Max
-146.27 -35.77 -10.09 26.38 340.89
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.379e+02 1.606e+01 8.586 < 2e-16 ***
MedianHomeValue -5.887e-05 5.475e-05 -1.075 0.282808
IsMarriott1 4.736e+01 1.080e+01 4.387 1.41e-05 ***
IsHilton1 6.309e+01 1.016e+01 6.209 1.15e-09 ***
HotelCapacity 6.945e-02 2.650e-02 2.620 0.009061 **
HasSwimmingPool1 -1.286e+01 6.698e+00 -1.920 0.055428 .
FreeBreakfast1 1.846e+01 8.296e+00 2.225 0.026546 *
StarRating2.5 1.072e+01 1.520e+01 0.705 0.481156
StarRating3 5.116e+01 1.504e+01 3.402 0.000724 ***
StarRating3.5 5.505e+01 1.567e+01 3.514 0.000483 ***
StarRating4 1.100e+02 1.814e+01 6.062 2.71e-09 ***
StarRating4.5 8.211e+01 2.609e+01 3.147 0.001753 **
StarRating359 1.935e+02 6.388e+01 3.029 0.002587 **
IsWeekend1 3.178e+01 6.833e+00 4.651 4.26e-06 ***
Available1 -6.075e+01 6.825e+00 -8.901 < 2e-16 ***
MedianHouseHoldIncome 5.114e-05 2.420e-04 0.211 0.832719
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 60.9 on 484 degrees of freedom
Multiple R-squared: 0.526, Adjusted R-squared: 0.5113
F-statistic: 35.81 on 15 and 484 DF, p-value: < 2.2e-16
LS0tDQp0aXRsZTogIkluZGlhbmFwb2xpcyBEYXRhIFN1bW1hcnkiDQphdXRob3I6ICJTYW1lZXIgTWF0aHVyIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KICAgDQojIFJFQURJTkcgQU5EIERFU0NSSUJJTkcgREFUQQ0KDQojIyBSZWFkIHRoZSBEYXRhDQpgYGB7ciByZWFkLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KIyByZWFkaW5nIGRhdGENCkluZGlhbmFwb2xpcy5kZiA8LSByZWFkLmNzdihwYXN0ZSgiSW5kaWFuYXBvbGlzRGF0YS5jc3YiKSkgICAgICANCiMgZGltZW5zaW9uIG9mIHRoZSBkYXRhIGZyYW1lDQpkaW0oSW5kaWFuYXBvbGlzLmRmKSAgICANCmBgYA0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgZWNobz1GQUxTRX0NCkluZGlhbmFwb2xpcy5kZiREYXkgPC0gYXMuZmFjdG9yKEluZGlhbmFwb2xpcy5kZiREYXkpDQpJbmRpYW5hcG9saXMuZGYkSXNXZWVrZW5kIDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkSXNXZWVrZW5kKQ0KSW5kaWFuYXBvbGlzLmRmJEF2YWlsYWJsZSA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEF2YWlsYWJsZSkNCkluZGlhbmFwb2xpcy5kZiRTdGFyUmF0aW5nIDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkU3RhclJhdGluZykNCkluZGlhbmFwb2xpcy5kZiRGcmVlV2lmaSA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEZyZWVXaWZpKQ0KSW5kaWFuYXBvbGlzLmRmJEZyZWVCcmVha2Zhc3QgPC0gYXMuZmFjdG9yKEluZGlhbmFwb2xpcy5kZiRGcmVlQnJlYWtmYXN0KQ0KSW5kaWFuYXBvbGlzLmRmJEhhc1N3aW1taW5nUG9vbCA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEhhc1N3aW1taW5nUG9vbCkNCkluZGlhbmFwb2xpcy5kZiRJc01hcnJpb3R0IDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkSXNNYXJyaW90dCkNCkluZGlhbmFwb2xpcy5kZiRJc0hpbHRvbiA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJElzSGlsdG9uKQ0KSW5kaWFuYXBvbGlzLmRmJElzTWFycmlvdHRPckhpbHRvbiA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJElzTWFycmlvdHRPckhpbHRvbikNCmBgYA0KDQojIyBEYXRhIFN0cnVjdHVyZQ0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQphdHRhY2goSW5kaWFuYXBvbGlzLmRmKQ0KIyBkYXRhIHN0cnVjdHVyZSBvZiB0aGUgZGF0YWZyYW1lDQpzdHIoSW5kaWFuYXBvbGlzLmRmKQ0KYGBgDQoNCiMjIERlc2NyaXB0aXZlIHN0YXRpc3RpY3MNCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShwc3ljaCkNCiMgZGVzY3JpcHRpdmUgc3RhdGlzdGljcyBvZiBob3RlbHMgZm9yIGNpdHkgSW5kaWFuYXBvbGlzDQpkZXNjcmliZShJbmRpYW5hcG9saXMuZGYpWywgYygxOjEwKV0NCmBgYA0KDQojIyBSZWdyZXNzaW9uIDANCmBgYHtyfQ0KTW9kZWwwIDwtIFJlbnRVU0QgfiBIb3RlbENhcGFjaXR5ICsgSXNNYXJyaW90dCArIElzSGlsdG9uIA0KZml0MCA8LSBsbShNb2RlbDAsIGRhdGEgPSBJbmRpYW5hcG9saXMuZGYpDQpzdW1tYXJ5KGZpdDApDQpgYGANCg0KIyMgUmVncmVzc2lvbiAxDQpgYGB7cn0NCk1vZGVsMSA8LSBSZW50VVNEIH4gTWVkaWFuSG9tZVZhbHVlICsgTWVkaWFuSG91c2VIb2xkSW5jb21lICsgSXNNYXJyaW90dCArIElzSGlsdG9uDQpmaXQxIDwtIGxtKE1vZGVsMSwgZGF0YSA9IEluZGlhbmFwb2xpcy5kZikNCnN1bW1hcnkoZml0MSkNCmBgYA0KDQojIyBSZWdyZXNzaW9uIDINCmBgYHtyfQ0KTW9kZWwxIDwtIFJlbnRVU0QgfiBNZWRpYW5Ib21lVmFsdWUgKyBJc01hcnJpb3R0ICsgSXNIaWx0b24gKyBIb3RlbENhcGFjaXR5ICsgSGFzU3dpbW1pbmdQb29sICsgRnJlZUJyZWFrZmFzdCArIEZyZWVXaWZpICsgU3RhclJhdGluZyArIElzV2Vla2VuZCArIEF2YWlsYWJsZSArIE1lZGlhbkhvdXNlSG9sZEluY29tZQ0KZml0MSA8LSBsbShNb2RlbDEsIGRhdGEgPSBJbmRpYW5hcG9saXMuZGYpDQpzdW1tYXJ5KGZpdDEpDQpgYGANCg0KIyMgUmVncmVzc2lvbiAzDQpgYGB7cn0NCk1vZGVsMyA8LSBSZW50VVNEIH4gTWVkaWFuSG9tZVZhbHVlICsgSXNNYXJyaW90dCArIElzSGlsdG9uICsgSG90ZWxDYXBhY2l0eSArIEhhc1N3aW1taW5nUG9vbCArIEZyZWVCcmVha2Zhc3QgKyBTdGFyUmF0aW5nICsgSXNXZWVrZW5kICsgQXZhaWxhYmxlICsgTWVkaWFuSG91c2VIb2xkSW5jb21lDQpmaXQzIDwtIGxtKE1vZGVsMywgZGF0YSA9IEluZGlhbmFwb2xpcy5kZikNCnN1bW1hcnkoZml0MykNCmBgYA0KDQo=