READING AND DESCRIBING DATA

Read the Data

# reading data
Indianapolis.df <- read.csv(paste("IndianapolisData.csv"))      
# dimension of the data frame
dim(Indianapolis.df)    
[1] 500  24

Data Structure

attach(Indianapolis.df)
# data structure of the dataframe
str(Indianapolis.df)
'data.frame':   500 obs. of  24 variables:
 $ CityName             : Factor w/ 1 level "Indianapolis": 1 1 1 1 1 1 1 1 1 1 ...
 $ Population           : int  864771 864771 864771 864771 864771 864771 864771 864771 864771 864771 ...
 $ IsTourist            : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Day                  : Factor w/ 7 levels "Friday","Monday",..: 7 5 1 3 4 2 6 7 5 1 ...
 $ Date                 : Factor w/ 10 levels "Dec 1 2017","Dec 2 2017",..: 9 10 1 2 3 4 5 6 7 8 ...
 $ IsWeekend            : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 1 2 ...
 $ HotelName            : Factor w/ 49 levels "Baymont Inn & Suites Indianapolis, Indianapolis",..: 29 29 29 29 29 29 29 29 29 29 ...
 $ Available            : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 2 2 2 ...
 $ MaxRentUSD           : int  84 84 119 119 84 90 84 90 90 97 ...
 $ RentUSD              : int  71 71 107 119 71 76 71 76 76 87 ...
 $ StarRating           : Factor w/ 7 levels "2","2.5","3",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ GuestRating          : num  4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 4.1 ...
 $ HotelAddress         : Factor w/ 49 levels "10 East Market Street, Indianapolis, IN, 46204, United States of America, 855-239-9477",..: 11 11 11 11 11 11 11 11 11 11 ...
 $ HotelPincode         : int  46143 46143 46143 46143 46143 46143 46143 46143 46143 46143 ...
 $ HotelDescription     : Factor w/ 40 levels "3-star B&B, convenient to Bankers Life Fieldhouse",..: 36 36 36 36 36 36 36 36 36 36 ...
 $ FreeWifi             : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ FreeBreakfast        : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ HotelCapacity        : int  81 81 81 81 81 81 81 81 81 81 ...
 $ HasSwimmingPool      : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ IsMarriott           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ IsHilton             : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ IsMarriottOrHilton   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ MedianHomeValue      : int  149000 149000 149000 149000 149000 149000 149000 149000 149000 149000 ...
 $ MedianHouseHoldIncome: int  63265 63265 63265 63265 63265 63265 63265 63265 63265 63265 ...

Descriptive statistics

library(psych)
# descriptive statistics of hotels for city Indianapolis
describe(Indianapolis.df)[, c(1:10)]
                      vars   n      mean       sd   median   trimmed       mad      min      max    range
CityName*                1 500      1.00     0.00      1.0      1.00      0.00      1.0      1.0      0.0
Population               2 500 864771.00     0.00 864771.0 864771.00      0.00 864771.0 864771.0      0.0
IsTourist                3 500      0.00     0.00      0.0      0.00      0.00      0.0      0.0      0.0
Day*                     4 500      4.10     2.17      4.5      4.12      2.97      1.0      7.0      6.0
Date*                    5 500      5.50     2.88      5.5      5.50      3.71      1.0     10.0      9.0
IsWeekend*               6 500      1.30     0.46      1.0      1.25      0.00      1.0      2.0      1.0
HotelName*               7 500     24.88    14.04     24.5     24.85     17.79      1.0     49.0     48.0
Available*               8 500      1.65     0.48      2.0      1.69      0.00      1.0      2.0      1.0
MaxRentUSD               9 500    213.92    86.73    195.0    206.50     83.03     65.0    529.0    464.0
RentUSD                 10 500    182.15    87.13    161.0    170.39     66.72     55.0    529.0    474.0
StarRating*             11 500      3.15     1.13      3.0      3.08      1.48      1.0      7.0      6.0
GuestRating             12 500      4.24     0.33      4.2      4.26      0.30      3.4      4.8      1.4
HotelAddress*           13 500     25.22    14.10     25.5     25.27     17.79      1.0     49.0     48.0
HotelPincode            14 500  46216.32    20.52  46211.5  46215.05     14.08  46143.0  46290.0    147.0
HotelDescription*       15 500     20.34    11.82     19.5     20.38     14.08      1.0     40.0     39.0
FreeWifi*               16 500      1.96     0.20      2.0      2.00      0.00      1.0      2.0      1.0
FreeBreakfast*          17 500      1.62     0.49      2.0      1.65      0.00      1.0      2.0      1.0
HotelCapacity           18 500    187.16   182.40    127.0    157.35     87.47      3.0   1005.0   1002.0
HasSwimmingPool*        19 500      1.64     0.48      2.0      1.68      0.00      1.0      2.0      1.0
IsMarriott*             20 500      1.20     0.40      1.0      1.12      0.00      1.0      2.0      1.0
IsHilton*               21 500      1.14     0.35      1.0      1.05      0.00      1.0      2.0      1.0
IsMarriottOrHilton*     22 500      1.34     0.47      1.0      1.30      0.00      1.0      2.0      1.0
MedianHomeValue         23 500 160134.00 93518.00 168900.0 154210.00 140624.61  58000.0 489500.0 431500.0
MedianHouseHoldIncome   24 500  38811.60 18926.22  34600.0  35494.05   7748.07  24548.0 151557.0 127009.0

Regression 0

Model0 <- RentUSD ~ HotelCapacity + IsMarriott + IsHilton 
fit0 <- lm(Model0, data = Indianapolis.df)
summary(fit0)

Call:
lm(formula = Model0, data = Indianapolis.df)

Residuals:
    Min      1Q  Median      3Q     Max 
-137.39  -53.39  -21.36   33.53  365.33 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   140.70085    5.03433  27.948  < 2e-16 ***
HotelCapacity   0.10988    0.02243   4.899 1.30e-06 ***
IsMarriott1    51.13125   10.41069   4.911 1.23e-06 ***
IsHilton1      76.12973   10.25612   7.423 5.02e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 76.36 on 496 degrees of freedom
Multiple R-squared:  0.2366,    Adjusted R-squared:  0.232 
F-statistic: 51.24 on 3 and 496 DF,  p-value: < 2.2e-16

Regression 1

Model1 <- RentUSD ~ MedianHomeValue + MedianHouseHoldIncome + IsMarriott + IsHilton
fit1 <- lm(Model1, data = Indianapolis.df)
summary(fit1)

Call:
lm(formula = Model1, data = Indianapolis.df)

Residuals:
    Min      1Q  Median      3Q     Max 
-127.82  -53.88  -17.82   25.65  353.44 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)            1.620e+02  8.497e+00  19.061  < 2e-16 ***
MedianHomeValue        1.613e-04  5.919e-05   2.725  0.00665 ** 
MedianHouseHoldIncome -7.309e-04  2.682e-04  -2.726  0.00665 ** 
IsMarriott1            6.369e+01  1.041e+01   6.116 1.96e-09 ***
IsHilton1              7.126e+01  1.147e+01   6.213 1.10e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 77.59 on 495 degrees of freedom
Multiple R-squared:  0.2133,    Adjusted R-squared:  0.207 
F-statistic: 33.56 on 4 and 495 DF,  p-value: < 2.2e-16

Regression 2

Model1 <- RentUSD ~ MedianHomeValue + IsMarriott + IsHilton + HotelCapacity + HasSwimmingPool + FreeBreakfast + FreeWifi + StarRating + IsWeekend + Available + MedianHouseHoldIncome
fit1 <- lm(Model1, data = Indianapolis.df)
summary(fit1)

Call:
lm(formula = Model1, data = Indianapolis.df)

Residuals:
     Min       1Q   Median       3Q      Max 
-144.831  -34.074   -8.823   26.690  312.452 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)            3.527e+01  2.372e+01   1.487 0.137592    
MedianHomeValue       -4.139e-05  5.312e-05  -0.779 0.436212    
IsMarriott1            5.889e+01  1.065e+01   5.530 5.24e-08 ***
IsHilton1              7.681e+01  1.013e+01   7.584 1.74e-13 ***
HotelCapacity          4.585e-02  2.600e-02   1.764 0.078427 .  
HasSwimmingPool1      -5.248e+00  6.622e+00  -0.793 0.428413    
FreeBreakfast1         1.982e+01  8.039e+00   2.465 0.014042 *  
FreeWifi1              1.001e+02  1.746e+01   5.732 1.75e-08 ***
StarRating2.5          6.820e+00  1.474e+01   0.463 0.643781    
StarRating3            4.439e+01  1.461e+01   3.038 0.002514 ** 
StarRating3.5          5.236e+01  1.518e+01   3.449 0.000613 ***
StarRating4            1.427e+02  1.848e+01   7.722 6.66e-14 ***
StarRating4.5          6.585e+01  2.543e+01   2.589 0.009916 ** 
StarRating359          1.991e+02  6.188e+01   3.217 0.001383 ** 
IsWeekend1             3.172e+01  6.619e+00   4.792 2.20e-06 ***
Available1            -6.092e+01  6.611e+00  -9.216  < 2e-16 ***
MedianHouseHoldIncome  2.019e-05  2.344e-04   0.086 0.931400    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 58.99 on 483 degrees of freedom
Multiple R-squared:  0.5562,    Adjusted R-squared:  0.5415 
F-statistic: 37.84 on 16 and 483 DF,  p-value: < 2.2e-16

Regression 3

Model3 <- RentUSD ~ MedianHomeValue + IsMarriott + IsHilton + HotelCapacity + HasSwimmingPool + FreeBreakfast + StarRating + IsWeekend + Available + MedianHouseHoldIncome
fit3 <- lm(Model3, data = Indianapolis.df)
summary(fit3)

Call:
lm(formula = Model3, data = Indianapolis.df)

Residuals:
    Min      1Q  Median      3Q     Max 
-146.27  -35.77  -10.09   26.38  340.89 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)            1.379e+02  1.606e+01   8.586  < 2e-16 ***
MedianHomeValue       -5.887e-05  5.475e-05  -1.075 0.282808    
IsMarriott1            4.736e+01  1.080e+01   4.387 1.41e-05 ***
IsHilton1              6.309e+01  1.016e+01   6.209 1.15e-09 ***
HotelCapacity          6.945e-02  2.650e-02   2.620 0.009061 ** 
HasSwimmingPool1      -1.286e+01  6.698e+00  -1.920 0.055428 .  
FreeBreakfast1         1.846e+01  8.296e+00   2.225 0.026546 *  
StarRating2.5          1.072e+01  1.520e+01   0.705 0.481156    
StarRating3            5.116e+01  1.504e+01   3.402 0.000724 ***
StarRating3.5          5.505e+01  1.567e+01   3.514 0.000483 ***
StarRating4            1.100e+02  1.814e+01   6.062 2.71e-09 ***
StarRating4.5          8.211e+01  2.609e+01   3.147 0.001753 ** 
StarRating359          1.935e+02  6.388e+01   3.029 0.002587 ** 
IsWeekend1             3.178e+01  6.833e+00   4.651 4.26e-06 ***
Available1            -6.075e+01  6.825e+00  -8.901  < 2e-16 ***
MedianHouseHoldIncome  5.114e-05  2.420e-04   0.211 0.832719    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 60.9 on 484 degrees of freedom
Multiple R-squared:  0.526, Adjusted R-squared:  0.5113 
F-statistic: 35.81 on 15 and 484 DF,  p-value: < 2.2e-16
LS0tDQp0aXRsZTogIkluZGlhbmFwb2xpcyBEYXRhIFN1bW1hcnkiDQphdXRob3I6ICJTYW1lZXIgTWF0aHVyIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KICAgDQojIFJFQURJTkcgQU5EIERFU0NSSUJJTkcgREFUQQ0KDQojIyBSZWFkIHRoZSBEYXRhDQpgYGB7ciByZWFkLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KIyByZWFkaW5nIGRhdGENCkluZGlhbmFwb2xpcy5kZiA8LSByZWFkLmNzdihwYXN0ZSgiSW5kaWFuYXBvbGlzRGF0YS5jc3YiKSkgICAgICANCiMgZGltZW5zaW9uIG9mIHRoZSBkYXRhIGZyYW1lDQpkaW0oSW5kaWFuYXBvbGlzLmRmKSAgICANCmBgYA0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgZWNobz1GQUxTRX0NCkluZGlhbmFwb2xpcy5kZiREYXkgPC0gYXMuZmFjdG9yKEluZGlhbmFwb2xpcy5kZiREYXkpDQpJbmRpYW5hcG9saXMuZGYkSXNXZWVrZW5kIDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkSXNXZWVrZW5kKQ0KSW5kaWFuYXBvbGlzLmRmJEF2YWlsYWJsZSA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEF2YWlsYWJsZSkNCkluZGlhbmFwb2xpcy5kZiRTdGFyUmF0aW5nIDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkU3RhclJhdGluZykNCkluZGlhbmFwb2xpcy5kZiRGcmVlV2lmaSA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEZyZWVXaWZpKQ0KSW5kaWFuYXBvbGlzLmRmJEZyZWVCcmVha2Zhc3QgPC0gYXMuZmFjdG9yKEluZGlhbmFwb2xpcy5kZiRGcmVlQnJlYWtmYXN0KQ0KSW5kaWFuYXBvbGlzLmRmJEhhc1N3aW1taW5nUG9vbCA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJEhhc1N3aW1taW5nUG9vbCkNCkluZGlhbmFwb2xpcy5kZiRJc01hcnJpb3R0IDwtIGFzLmZhY3RvcihJbmRpYW5hcG9saXMuZGYkSXNNYXJyaW90dCkNCkluZGlhbmFwb2xpcy5kZiRJc0hpbHRvbiA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJElzSGlsdG9uKQ0KSW5kaWFuYXBvbGlzLmRmJElzTWFycmlvdHRPckhpbHRvbiA8LSBhcy5mYWN0b3IoSW5kaWFuYXBvbGlzLmRmJElzTWFycmlvdHRPckhpbHRvbikNCmBgYA0KDQojIyBEYXRhIFN0cnVjdHVyZQ0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQphdHRhY2goSW5kaWFuYXBvbGlzLmRmKQ0KIyBkYXRhIHN0cnVjdHVyZSBvZiB0aGUgZGF0YWZyYW1lDQpzdHIoSW5kaWFuYXBvbGlzLmRmKQ0KYGBgDQoNCiMjIERlc2NyaXB0aXZlIHN0YXRpc3RpY3MNCmBgYHtyLCB3YXJuaW5nPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShwc3ljaCkNCiMgZGVzY3JpcHRpdmUgc3RhdGlzdGljcyBvZiBob3RlbHMgZm9yIGNpdHkgSW5kaWFuYXBvbGlzDQpkZXNjcmliZShJbmRpYW5hcG9saXMuZGYpWywgYygxOjEwKV0NCmBgYA0KDQojIyBSZWdyZXNzaW9uIDANCmBgYHtyfQ0KTW9kZWwwIDwtIFJlbnRVU0QgfiBIb3RlbENhcGFjaXR5ICsgSXNNYXJyaW90dCArIElzSGlsdG9uIA0KZml0MCA8LSBsbShNb2RlbDAsIGRhdGEgPSBJbmRpYW5hcG9saXMuZGYpDQpzdW1tYXJ5KGZpdDApDQpgYGANCg0KIyMgUmVncmVzc2lvbiAxDQpgYGB7cn0NCk1vZGVsMSA8LSBSZW50VVNEIH4gTWVkaWFuSG9tZVZhbHVlICsgTWVkaWFuSG91c2VIb2xkSW5jb21lICsgSXNNYXJyaW90dCArIElzSGlsdG9uDQpmaXQxIDwtIGxtKE1vZGVsMSwgZGF0YSA9IEluZGlhbmFwb2xpcy5kZikNCnN1bW1hcnkoZml0MSkNCmBgYA0KDQojIyBSZWdyZXNzaW9uIDINCmBgYHtyfQ0KTW9kZWwxIDwtIFJlbnRVU0QgfiBNZWRpYW5Ib21lVmFsdWUgKyBJc01hcnJpb3R0ICsgSXNIaWx0b24gKyBIb3RlbENhcGFjaXR5ICsgSGFzU3dpbW1pbmdQb29sICsgRnJlZUJyZWFrZmFzdCArIEZyZWVXaWZpICsgU3RhclJhdGluZyArIElzV2Vla2VuZCArIEF2YWlsYWJsZSArIE1lZGlhbkhvdXNlSG9sZEluY29tZQ0KZml0MSA8LSBsbShNb2RlbDEsIGRhdGEgPSBJbmRpYW5hcG9saXMuZGYpDQpzdW1tYXJ5KGZpdDEpDQpgYGANCg0KIyMgUmVncmVzc2lvbiAzDQpgYGB7cn0NCk1vZGVsMyA8LSBSZW50VVNEIH4gTWVkaWFuSG9tZVZhbHVlICsgSXNNYXJyaW90dCArIElzSGlsdG9uICsgSG90ZWxDYXBhY2l0eSArIEhhc1N3aW1taW5nUG9vbCArIEZyZWVCcmVha2Zhc3QgKyBTdGFyUmF0aW5nICsgSXNXZWVrZW5kICsgQXZhaWxhYmxlICsgTWVkaWFuSG91c2VIb2xkSW5jb21lDQpmaXQzIDwtIGxtKE1vZGVsMywgZGF0YSA9IEluZGlhbmFwb2xpcy5kZikNCnN1bW1hcnkoZml0MykNCmBgYA0KDQo=