1. reading data into R
hotel=read.csv(paste("Prashant Project Data.csv",sep=""), )
View(hotel)
dim(hotel)
## [1] 854  10

-> no. of rows is 854 and no. of columns is 10

  1. descriptive statistics
library(psych)
describe(hotel)
##                      vars   n        mean          sd   median     trimmed
## room_id                 1 854 11672573.46  5970259.24 13329838 12044585.16
## survey_id               2 854     1498.00        0.00     1498     1498.00
## host_id                 3 854 37877448.52 38428065.69 22920130 32332288.35
## room_type*              4 854        1.41        0.51        1        1.38
## neighborhood*           5 854        1.09        0.29        1        1.00
## reviews                 6 854       49.11       61.11       28       37.36
## overall_satisfaction    7 854        4.18        1.76        5        4.60
## accommodates            8 854        3.41        1.96        3        3.08
## bedrooms                9 854        1.35        0.84        1        1.25
## price                  10 854      126.62      202.38       95      104.00
##                              mad   min       max     range  skew kurtosis
## room_id               6822685.02 67870  19912932  19845062 -0.45    -1.14
## survey_id                   0.00  1498      1498         0   NaN      NaN
## host_id              29911610.67 62667 141036151 140973484  1.01    -0.11
## room_type*                  0.00     1         3         2  0.58    -1.17
## neighborhood*               0.00     1         3         2  2.95     7.28
## reviews                    35.58     0       602       602  2.64    12.03
## overall_satisfaction        0.00     0         5         5 -1.93     1.78
## accommodates                1.48     1        17        16  2.09     6.94
## bedrooms                    0.00     0        10        10  2.60    16.51
## price                      44.48    20      5000      4980 18.02   405.72
##                              se
## room_id               204298.07
## survey_id                  0.00
## host_id              1314981.34
## room_type*                 0.02
## neighborhood*              0.01
## reviews                    2.09
## overall_satisfaction       0.06
## accommodates               0.07
## bedrooms                   0.03
## price                      6.93
summary(hotel)
##     room_id           survey_id       host_id         
##  Min.   :   67870   Min.   :1498   Min.   :    62667  
##  1st Qu.: 6413734   1st Qu.:1498   1st Qu.:  6453926  
##  Median :13329838   Median :1498   Median : 22920130  
##  Mean   :11672573   Mean   :1498   Mean   : 37877449  
##  3rd Qu.:16856088   3rd Qu.:1498   3rd Qu.: 58634762  
##  Max.   :19912932   Max.   :1498   Max.   :141036151  
##            room_type          neighborhood    reviews      
##  Entire home/apt:512   Asheville    :776   Min.   :  0.00  
##  Private room   :334   Formerly ETJ : 77   1st Qu.:  8.00  
##  Shared room    :  8   Richmond Hill:  1   Median : 28.00  
##                                            Mean   : 49.11  
##                                            3rd Qu.: 65.00  
##                                            Max.   :602.00  
##  overall_satisfaction  accommodates       bedrooms          price       
##  Min.   :0.00         Min.   : 1.000   Min.   : 0.000   Min.   :  20.0  
##  1st Qu.:4.50         1st Qu.: 2.000   1st Qu.: 1.000   1st Qu.:  70.0  
##  Median :5.00         Median : 3.000   Median : 1.000   Median :  95.0  
##  Mean   :4.18         Mean   : 3.412   Mean   : 1.352   Mean   : 126.6  
##  3rd Qu.:5.00         3rd Qu.: 4.000   3rd Qu.: 2.000   3rd Qu.: 139.0  
##  Max.   :5.00         Max.   :17.000   Max.   :10.000   Max.   :5000.0

3)one way contingency table for room type

table(hotel$room_type)
## 
## Entire home/apt    Private room     Shared room 
##             512             334               8
  1. two way contingency table for room type and price
hotel_table_two<-xtabs(~ room_type + price, data = hotel)
addmargins(hotel_table_two)
##                  price
## room_type          20  26  28  29  30  32  33  34  35  36  37  38  39  40
##   Entire home/apt   0   0   0   0   0   0   0   0   0   0   1   0   1   0
##   Private room      1   0   1   1   2   1   2   4   4   1   0   2   2  10
##   Shared room       1   4   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               2   4   1   1   2   1   2   4   4   1   1   2   3  10
##                  price
## room_type          42  44  45  46  47  48  49  50  51  52  53  54  55  57
##   Entire home/apt   1   0   0   1   1   1   2   1   1   1   0   1   4   1
##   Private room      3   7  11   3   2   2   4   8   1   2   1   3  15   4
##   Shared room       0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   Sum               4   7  12   4   3   3   6   9   2   3   1   4  19   5
##                  price
## room_type          58  59  60  61  62  63  64  65  66  67  68  69  70  71
##   Entire home/apt   0   1   2   0   0   0   0   4   0   1   5   4   3   2
##   Private room      7   1  12   2   3   4   2  23   1   2   4   5  14   0
##   Shared room       0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   Sum               7   2  14   2   3   4   2  27   1   4   9   9  17   2
##                  price
## room_type          72  73  74  75  76  77  78  79  80  81  82  83  84  85
##   Entire home/apt   2   1   3  11   1   2   2   9  12   2   0   1   0  14
##   Private room      1   1   0  17   0   2   2  11  13   0   1   1   1  24
##   Shared room       0   0   0   0   1   0   0   0   0   0   0   0   0   0
##   Sum               3   2   3  28   2   4   4  20  25   2   1   2   1  38
##                  price
## room_type          86  87  88  89  90  91  92  93  94  95  97  98  99 100
##   Entire home/apt   2   1   5  16   8   1   1   2   3  17   3   2  21  20
##   Private room      0   1   1   4   8   0   2   0   0   9   0   1   6  14
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               2   2   6  20  16   1   3   2   3  26   3   3  27  34
##                  price
## room_type         102 104 105 106 107 108 109 110 111 112 114 115 116 118
##   Entire home/apt   0   2   6   1   3   1   5  13   3   2   4  13   1   1
##   Private room      1   0   3   1   0   0   0   0   0   0   0   0   0   0
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               1   2   9   2   3   1   5  13   3   2   4  13   1   1
##                  price
## room_type         119 120 123 125 127 128 129 130 134 135 139 140 142 143
##   Entire home/apt   7   9   1  21   0   2   1  10   3  10   4   9   1   1
##   Private room      0   1   0   1   1   0   2   1   0   2   0   1   0   0
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               7  10   1  22   1   2   3  11   3  12   4  10   1   1
##                  price
## room_type         144 145 146 147 149 150 154 155 158 159 160 164 165 167
##   Entire home/apt   1   0   3   1   3  20   1   1   2   2   4   1   2   1
##   Private room      1   2   0   1   1   2   0   0   0   2   1   0   1   0
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               2   2   3   2   4  22   1   1   2   4   5   1   3   1
##                  price
## room_type         169 170 175 177 179 180 183 185 189 190 192 195 198 199
##   Entire home/apt   1   0  17   1   1   4   1   3   3   5   1   3   1   5
##   Private room      0   1   0   0   0   0   0   0   0   0   0   0   0   1
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               1   1  17   1   1   4   1   3   3   5   1   3   1   6
##                  price
## room_type         200 205 209 210 219 224 225 229 240 245 249 250 259 262
##   Entire home/apt  11   1   0   1   1   1   8   1   1   4   1  17   0   2
##   Private room      1   0   3   0   0   0   1   0   0   0   1   0   2   0
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum              12   1   3   1   1   1   9   1   1   4   2  17   2   2
##                  price
## room_type         265 275 288 289 290 295 300 315 325 329 330 350 375 395
##   Entire home/apt   1   5   1   0   1   2   3   1   3   0   1   2   1   1
##   Private room      0   0   0   1   0   0   0   0   0   1   0   0   0   0
##   Shared room       0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   Sum               1   5   1   1   1   2   3   1   3   1   1   2   1   1
##                  price
## room_type         400 425 450 465 475 485 540 600 930 1250 2222 5000 Sum
##   Entire home/apt   4   2   3   1   1   1   1   1   1    1    1    1 512
##   Private room      0   0   0   0   0   0   0   0   0    0    0    0 334
##   Shared room       0   0   0   0   0   0   0   0   0    0    0    0   8
##   Sum               4   2   3   1   1   1   1   1   1    1    1    1 854
  1. Box Plot for accomodates
boxplot(hotel$accommodates, horizontal = TRUE, main = "Box Plot for accomodates", xlab = "accomodates", col = "blue")

  1. Box Plot for overall satisfaction
boxplot(hotel$overall_satisfaction, horizontal = TRUE, main = "Box Plot for overall satisfaction", xlab = "overall satisafaction", col = "green")

7)Bar Graph for overall satisfaction of the customers

table(hotel$overall_satisfaction)
## 
##   0   4 4.5   5 
## 127   8 115 604
overall_satisf <-table(hotel$overall_satisfaction)
barplot(overall_satisf, width=0.5, space=1, main = "Overall satisfaction of customers", xlab="satisfaction level
        (0=Lowest---5=Highest)",col=c( "yellowgreen", "green","darkolivegreen", "darkgreen","red"), ylim=c(0,860), 
        xlim=c(0,10), names.arg=c("0","4","4.5","5"))

  1. ScatterPlot Martrix for different variables
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(~room_type+reviews+overall_satisfaction+accommodates+bedrooms+price, data=hotel, main="Variation of customer Satisfaction with Age, Business Travel Rate and Department")

  1. Pearson Chi-square Test for room type and satisfaction
chi1 <- xtabs (~ overall_satisfaction + room_type, data=hotel)
chisq.test(chi1)
## Warning in chisq.test(chi1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  chi1
## X-squared = 31.374, df = 6, p-value = 2.151e-05
  1. Pearson Chi-square Test for neighborhood and satisfaction level
chi2 <- xtabs (~ overall_satisfaction + neighborhood, data=hotel)
chisq.test(chi2)
## Warning in chisq.test(chi2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  chi2
## X-squared = 9.2236, df = 6, p-value = 0.1614
  1. Performing T-Test for finding the dependency of reviews on overall satisfaction level
t.test(hotel$overall_satisfaction, hotel$reviews)
## 
##  Welch Two Sample t-test
## 
## data:  hotel$overall_satisfaction and hotel$reviews
## t = -21.476, df = 854.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -49.03783 -40.82517
## sample estimates:
## mean of x mean of y 
##  4.179742 49.111241
  1. Performing T-Test for finding the dependency of number of accommodates on overall satisfaction level
t.test(hotel$overall_satisfaction, hotel$accommodates)
## 
##  Welch Two Sample t-test
## 
## data:  hotel$overall_satisfaction and hotel$accommodates
## t = 8.5144, df = 1685.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.5907490 0.9443798
## sample estimates:
## mean of x mean of y 
##  4.179742  3.412178

13)Performing T-Test for finding the dependency of number of bedrooms on overall satisfaction level

t.test(hotel$overall_satisfaction, hotel$bedrooms)
## 
##  Welch Two Sample t-test
## 
## data:  hotel$overall_satisfaction and hotel$bedrooms
## t = 42.393, df = 1223.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.696440 2.958127
## sample estimates:
## mean of x mean of y 
##  4.179742  1.352459
  1. Performing T-Test for finding the dependency of price on overall satisfaction level
t.test(hotel$overall_satisfaction, hotel$price)
## 
##  Welch Two Sample t-test
## 
## data:  hotel$overall_satisfaction and hotel$price
## t = -17.679, df = 853.13, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -136.0308 -108.8439
## sample estimates:
##  mean of x  mean of y 
##   4.179742 126.617096
  1. Correlation Analysis
cor(hotel[,6:10])
##                         reviews overall_satisfaction accommodates
## reviews               1.0000000           0.33413467  -0.11658640
## overall_satisfaction  0.3341347           1.00000000  -0.09202409
## accommodates         -0.1165864          -0.09202409   1.00000000
## bedrooms             -0.1143018          -0.09009348   0.79817570
## price                -0.0931582          -0.14207140   0.51468095
##                         bedrooms      price
## reviews              -0.11430182 -0.0931582
## overall_satisfaction -0.09009348 -0.1420714
## accommodates          0.79817570  0.5146809
## bedrooms              1.00000000  0.5437369
## price                 0.54373690  1.0000000
  1. Generating Corgram
library(corrgram)
corrgram(hotel[,6:10], order=TRUE, lower.panel=panel.shade,upper.panel=panel.pie, text.panel=panel.txt,main="Corrgram for different variables")

  1. Converting variables into factors # Converting Department into factor variable
hotel$room_type[hotel$Res==0] <- 'Entire home/apt'    
hotel$room_type[hotel$Res == 1] <- 'Private room'
hotel$room_type[hotel$Res == 2] <- 'Shared room'

hotel$room_type<- factor(hotel$room_type)
  1. Regression Analysis
reg1 <- lm(overall_satisfaction ~ room_type + reviews + accommodates + bedrooms + price , data = hotel)

summary(reg1)
## 
## Call:
## lm(formula = overall_satisfaction ~ room_type + reviews + accommodates + 
##     bedrooms + price, data = hotel)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7108 -0.1039  0.6725  1.0245  3.1436 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.0854118  0.1635207  24.984  < 2e-16 ***
## room_typePrivate room -0.3401051  0.1295452  -2.625  0.00881 ** 
## room_typeShared room  -0.2331303  0.5900378  -0.395  0.69286    
## reviews                0.0092462  0.0009294   9.948  < 2e-16 ***
## accommodates          -0.0537422  0.0521089  -1.031  0.30267    
## bedrooms               0.0645657  0.1164050   0.555  0.57927    
## price                 -0.0010149  0.0003363  -3.018  0.00262 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.645 on 847 degrees of freedom
## Multiple R-squared:  0.1313, Adjusted R-squared:  0.1251 
## F-statistic: 21.34 on 6 and 847 DF,  p-value: < 2.2e-16
  1. Summary

-> The analysis for the given data was performed and the dependecies of various parameters impacting the overall satisfaction of customers on the different hotels of AIRBNB in the Ashville state were examined. After carring out the analysis of given data, it was found that the parameter room type significantly impacts the overall satisfaction of the customers of the vicinity. Also, the parameters bedrooms, price and accommodates are affecting overall satisfaction negligibly. We are also getting the multiple r square value = 0.1313, which means that there are also certain parameters which we have not considered in the analysis that will be more efficient in explaining the current model.