hotel=read.csv(paste("Prashant Project Data.csv",sep=""), )
View(hotel)
dim(hotel)
## [1] 854 10
-> no. of rows is 854 and no. of columns is 10
library(psych)
describe(hotel)
## vars n mean sd median trimmed
## room_id 1 854 11672573.46 5970259.24 13329838 12044585.16
## survey_id 2 854 1498.00 0.00 1498 1498.00
## host_id 3 854 37877448.52 38428065.69 22920130 32332288.35
## room_type* 4 854 1.41 0.51 1 1.38
## neighborhood* 5 854 1.09 0.29 1 1.00
## reviews 6 854 49.11 61.11 28 37.36
## overall_satisfaction 7 854 4.18 1.76 5 4.60
## accommodates 8 854 3.41 1.96 3 3.08
## bedrooms 9 854 1.35 0.84 1 1.25
## price 10 854 126.62 202.38 95 104.00
## mad min max range skew kurtosis
## room_id 6822685.02 67870 19912932 19845062 -0.45 -1.14
## survey_id 0.00 1498 1498 0 NaN NaN
## host_id 29911610.67 62667 141036151 140973484 1.01 -0.11
## room_type* 0.00 1 3 2 0.58 -1.17
## neighborhood* 0.00 1 3 2 2.95 7.28
## reviews 35.58 0 602 602 2.64 12.03
## overall_satisfaction 0.00 0 5 5 -1.93 1.78
## accommodates 1.48 1 17 16 2.09 6.94
## bedrooms 0.00 0 10 10 2.60 16.51
## price 44.48 20 5000 4980 18.02 405.72
## se
## room_id 204298.07
## survey_id 0.00
## host_id 1314981.34
## room_type* 0.02
## neighborhood* 0.01
## reviews 2.09
## overall_satisfaction 0.06
## accommodates 0.07
## bedrooms 0.03
## price 6.93
summary(hotel)
## room_id survey_id host_id
## Min. : 67870 Min. :1498 Min. : 62667
## 1st Qu.: 6413734 1st Qu.:1498 1st Qu.: 6453926
## Median :13329838 Median :1498 Median : 22920130
## Mean :11672573 Mean :1498 Mean : 37877449
## 3rd Qu.:16856088 3rd Qu.:1498 3rd Qu.: 58634762
## Max. :19912932 Max. :1498 Max. :141036151
## room_type neighborhood reviews
## Entire home/apt:512 Asheville :776 Min. : 0.00
## Private room :334 Formerly ETJ : 77 1st Qu.: 8.00
## Shared room : 8 Richmond Hill: 1 Median : 28.00
## Mean : 49.11
## 3rd Qu.: 65.00
## Max. :602.00
## overall_satisfaction accommodates bedrooms price
## Min. :0.00 Min. : 1.000 Min. : 0.000 Min. : 20.0
## 1st Qu.:4.50 1st Qu.: 2.000 1st Qu.: 1.000 1st Qu.: 70.0
## Median :5.00 Median : 3.000 Median : 1.000 Median : 95.0
## Mean :4.18 Mean : 3.412 Mean : 1.352 Mean : 126.6
## 3rd Qu.:5.00 3rd Qu.: 4.000 3rd Qu.: 2.000 3rd Qu.: 139.0
## Max. :5.00 Max. :17.000 Max. :10.000 Max. :5000.0
3)one way contingency table for room type
table(hotel$room_type)
##
## Entire home/apt Private room Shared room
## 512 334 8
hotel_table_two<-xtabs(~ room_type + price, data = hotel)
addmargins(hotel_table_two)
## price
## room_type 20 26 28 29 30 32 33 34 35 36 37 38 39 40
## Entire home/apt 0 0 0 0 0 0 0 0 0 0 1 0 1 0
## Private room 1 0 1 1 2 1 2 4 4 1 0 2 2 10
## Shared room 1 4 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 2 4 1 1 2 1 2 4 4 1 1 2 3 10
## price
## room_type 42 44 45 46 47 48 49 50 51 52 53 54 55 57
## Entire home/apt 1 0 0 1 1 1 2 1 1 1 0 1 4 1
## Private room 3 7 11 3 2 2 4 8 1 2 1 3 15 4
## Shared room 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## Sum 4 7 12 4 3 3 6 9 2 3 1 4 19 5
## price
## room_type 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## Entire home/apt 0 1 2 0 0 0 0 4 0 1 5 4 3 2
## Private room 7 1 12 2 3 4 2 23 1 2 4 5 14 0
## Shared room 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## Sum 7 2 14 2 3 4 2 27 1 4 9 9 17 2
## price
## room_type 72 73 74 75 76 77 78 79 80 81 82 83 84 85
## Entire home/apt 2 1 3 11 1 2 2 9 12 2 0 1 0 14
## Private room 1 1 0 17 0 2 2 11 13 0 1 1 1 24
## Shared room 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## Sum 3 2 3 28 2 4 4 20 25 2 1 2 1 38
## price
## room_type 86 87 88 89 90 91 92 93 94 95 97 98 99 100
## Entire home/apt 2 1 5 16 8 1 1 2 3 17 3 2 21 20
## Private room 0 1 1 4 8 0 2 0 0 9 0 1 6 14
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 2 2 6 20 16 1 3 2 3 26 3 3 27 34
## price
## room_type 102 104 105 106 107 108 109 110 111 112 114 115 116 118
## Entire home/apt 0 2 6 1 3 1 5 13 3 2 4 13 1 1
## Private room 1 0 3 1 0 0 0 0 0 0 0 0 0 0
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 1 2 9 2 3 1 5 13 3 2 4 13 1 1
## price
## room_type 119 120 123 125 127 128 129 130 134 135 139 140 142 143
## Entire home/apt 7 9 1 21 0 2 1 10 3 10 4 9 1 1
## Private room 0 1 0 1 1 0 2 1 0 2 0 1 0 0
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 7 10 1 22 1 2 3 11 3 12 4 10 1 1
## price
## room_type 144 145 146 147 149 150 154 155 158 159 160 164 165 167
## Entire home/apt 1 0 3 1 3 20 1 1 2 2 4 1 2 1
## Private room 1 2 0 1 1 2 0 0 0 2 1 0 1 0
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 2 2 3 2 4 22 1 1 2 4 5 1 3 1
## price
## room_type 169 170 175 177 179 180 183 185 189 190 192 195 198 199
## Entire home/apt 1 0 17 1 1 4 1 3 3 5 1 3 1 5
## Private room 0 1 0 0 0 0 0 0 0 0 0 0 0 1
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 1 1 17 1 1 4 1 3 3 5 1 3 1 6
## price
## room_type 200 205 209 210 219 224 225 229 240 245 249 250 259 262
## Entire home/apt 11 1 0 1 1 1 8 1 1 4 1 17 0 2
## Private room 1 0 3 0 0 0 1 0 0 0 1 0 2 0
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 12 1 3 1 1 1 9 1 1 4 2 17 2 2
## price
## room_type 265 275 288 289 290 295 300 315 325 329 330 350 375 395
## Entire home/apt 1 5 1 0 1 2 3 1 3 0 1 2 1 1
## Private room 0 0 0 1 0 0 0 0 0 1 0 0 0 0
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Sum 1 5 1 1 1 2 3 1 3 1 1 2 1 1
## price
## room_type 400 425 450 465 475 485 540 600 930 1250 2222 5000 Sum
## Entire home/apt 4 2 3 1 1 1 1 1 1 1 1 1 512
## Private room 0 0 0 0 0 0 0 0 0 0 0 0 334
## Shared room 0 0 0 0 0 0 0 0 0 0 0 0 8
## Sum 4 2 3 1 1 1 1 1 1 1 1 1 854
boxplot(hotel$accommodates, horizontal = TRUE, main = "Box Plot for accomodates", xlab = "accomodates", col = "blue")
boxplot(hotel$overall_satisfaction, horizontal = TRUE, main = "Box Plot for overall satisfaction", xlab = "overall satisafaction", col = "green")
7)Bar Graph for overall satisfaction of the customers
table(hotel$overall_satisfaction)
##
## 0 4 4.5 5
## 127 8 115 604
overall_satisf <-table(hotel$overall_satisfaction)
barplot(overall_satisf, width=0.5, space=1, main = "Overall satisfaction of customers", xlab="satisfaction level
(0=Lowest---5=Highest)",col=c( "yellowgreen", "green","darkolivegreen", "darkgreen","red"), ylim=c(0,860),
xlim=c(0,10), names.arg=c("0","4","4.5","5"))
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(~room_type+reviews+overall_satisfaction+accommodates+bedrooms+price, data=hotel, main="Variation of customer Satisfaction with Age, Business Travel Rate and Department")
chi1 <- xtabs (~ overall_satisfaction + room_type, data=hotel)
chisq.test(chi1)
## Warning in chisq.test(chi1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi1
## X-squared = 31.374, df = 6, p-value = 2.151e-05
chi2 <- xtabs (~ overall_satisfaction + neighborhood, data=hotel)
chisq.test(chi2)
## Warning in chisq.test(chi2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: chi2
## X-squared = 9.2236, df = 6, p-value = 0.1614
t.test(hotel$overall_satisfaction, hotel$reviews)
##
## Welch Two Sample t-test
##
## data: hotel$overall_satisfaction and hotel$reviews
## t = -21.476, df = 854.41, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -49.03783 -40.82517
## sample estimates:
## mean of x mean of y
## 4.179742 49.111241
t.test(hotel$overall_satisfaction, hotel$accommodates)
##
## Welch Two Sample t-test
##
## data: hotel$overall_satisfaction and hotel$accommodates
## t = 8.5144, df = 1685.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.5907490 0.9443798
## sample estimates:
## mean of x mean of y
## 4.179742 3.412178
13)Performing T-Test for finding the dependency of number of bedrooms on overall satisfaction level
t.test(hotel$overall_satisfaction, hotel$bedrooms)
##
## Welch Two Sample t-test
##
## data: hotel$overall_satisfaction and hotel$bedrooms
## t = 42.393, df = 1223.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.696440 2.958127
## sample estimates:
## mean of x mean of y
## 4.179742 1.352459
t.test(hotel$overall_satisfaction, hotel$price)
##
## Welch Two Sample t-test
##
## data: hotel$overall_satisfaction and hotel$price
## t = -17.679, df = 853.13, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -136.0308 -108.8439
## sample estimates:
## mean of x mean of y
## 4.179742 126.617096
cor(hotel[,6:10])
## reviews overall_satisfaction accommodates
## reviews 1.0000000 0.33413467 -0.11658640
## overall_satisfaction 0.3341347 1.00000000 -0.09202409
## accommodates -0.1165864 -0.09202409 1.00000000
## bedrooms -0.1143018 -0.09009348 0.79817570
## price -0.0931582 -0.14207140 0.51468095
## bedrooms price
## reviews -0.11430182 -0.0931582
## overall_satisfaction -0.09009348 -0.1420714
## accommodates 0.79817570 0.5146809
## bedrooms 1.00000000 0.5437369
## price 0.54373690 1.0000000
library(corrgram)
corrgram(hotel[,6:10], order=TRUE, lower.panel=panel.shade,upper.panel=panel.pie, text.panel=panel.txt,main="Corrgram for different variables")
hotel$room_type[hotel$Res==0] <- 'Entire home/apt'
hotel$room_type[hotel$Res == 1] <- 'Private room'
hotel$room_type[hotel$Res == 2] <- 'Shared room'
hotel$room_type<- factor(hotel$room_type)
reg1 <- lm(overall_satisfaction ~ room_type + reviews + accommodates + bedrooms + price , data = hotel)
summary(reg1)
##
## Call:
## lm(formula = overall_satisfaction ~ room_type + reviews + accommodates +
## bedrooms + price, data = hotel)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7108 -0.1039 0.6725 1.0245 3.1436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.0854118 0.1635207 24.984 < 2e-16 ***
## room_typePrivate room -0.3401051 0.1295452 -2.625 0.00881 **
## room_typeShared room -0.2331303 0.5900378 -0.395 0.69286
## reviews 0.0092462 0.0009294 9.948 < 2e-16 ***
## accommodates -0.0537422 0.0521089 -1.031 0.30267
## bedrooms 0.0645657 0.1164050 0.555 0.57927
## price -0.0010149 0.0003363 -3.018 0.00262 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.645 on 847 degrees of freedom
## Multiple R-squared: 0.1313, Adjusted R-squared: 0.1251
## F-statistic: 21.34 on 6 and 847 DF, p-value: < 2.2e-16
-> The analysis for the given data was performed and the dependecies of various parameters impacting the overall satisfaction of customers on the different hotels of AIRBNB in the Ashville state were examined. After carring out the analysis of given data, it was found that the parameter room type significantly impacts the overall satisfaction of the customers of the vicinity. Also, the parameters bedrooms, price and accommodates are affecting overall satisfaction negligibly. We are also getting the multiple r square value = 0.1313, which means that there are also certain parameters which we have not considered in the analysis that will be more efficient in explaining the current model.