Bivariate Data Summary and Visualization -- Hotels

Sameer Mathur

BIVARIATE RELATIONSHIP

Read the data

AllCities.df <- read.csv(paste("AllCitiesData.csv", sep=""))
attach(AllCities.df)
dim(AllCities.df)
[1] 6467   18

DISCRETE VARIABLES - SUMMARY AND VISUALIZATION

Average Room Rent in Tourist Destination versus Non-tourist Destination

# Method 1: using aggregate()
aggregate(RoomRent, by=list(TouristDestination = IsTouristDestination), mean)
  TouristDestination        x
1                 No 4507.500
2                Yes 6790.124
# Method 2: using by()
by(RoomRent, IsTouristDestination, mean)
IsTouristDestination: No
[1] 4507.5
-------------------------------------------------------- 
IsTouristDestination: Yes
[1] 6790.124

Average Hotel Room Rent in Tourist Destination (Yes:No) offering Free Breakfast (Yes:No)

aggregate(RoomRent, by=list(TouristDestination = IsTouristDestination, FreeBreakfast = FreeBreakfast), mean)
  TouristDestination FreeBreakfast        x
1                 No            No 4672.975
2                Yes            No 6355.771
3                 No           Yes 4424.362
4                Yes           Yes 7096.756

Average Hotel Room Rent in Tourist Destination (Yes:No) offering Free Wifi (Yes:No)

aggregate(RoomRent, by=list(TouristDestination = IsTouristDestination, FreeWifi = FreeWifi), mean)
  TouristDestination FreeWifi        x
1                 No       No 4151.906
2                Yes       No 5894.469
3                 No      Yes 4529.812
4                Yes      Yes 6885.568

Average Hotel Room Rent in Tourist Destination (Yes:No) offering Swimming Pool (Yes:No)

aggregate(RoomRent, by=list(TouristDestination = IsTouristDestination, SwimmingPool = HasSwimmingPool), mean)
  TouristDestination SwimmingPool         x
1                 No           No  3328.812
2                Yes           No  3966.258
3                 No          Yes  6691.175
4                Yes          Yes 10563.975

Mean plot of IsTouristDestination

library(gplots)
plotmeans(RoomRent ~ IsTouristDestination, data = AllCities.df, frame = FALSE)

plot of chunk unnamed-chunk-6

Mean plot of FreeBreakfast

library(gplots)
plotmeans(RoomRent ~ FreeBreakfast, data = AllCities.df, frame = FALSE)

plot of chunk unnamed-chunk-7

Mean plot of FreeWifi

library(gplots)
plotmeans(RoomRent ~ FreeWifi, data = AllCities.df, frame = FALSE)

plot of chunk unnamed-chunk-8

Mean plot of Swimming Pool

library(gplots)
plotmeans(RoomRent ~ HasSwimmingPool, data = AllCities.df, frame = FALSE)

plot of chunk unnamed-chunk-9

Comparison of Room Rent corresponding to Tourist Destination

boxplot(RoomRent ~ IsTouristDestination, data=AllCities.df, horizontal=TRUE,
        ylab="Tourist Destination", xlab="Room Rent (INR)", las=1,
        main="Comparison of Room Rent corresponding to Tourist Destination")

plot of chunk unnamed-chunk-10

Comparison of Room Rent in Tourist Destination (Yes:No) corresponding to FreeWifi (Yes:No)

boxplot(RoomRent ~ FreeWifi, data=AllCities.df, horizontal=TRUE,
        ylab="Free Wifi", xlab="Room Rent (INR)", las=1,
        main="Comparison of Room Rent corresponding to Free Wifi")

plot of chunk unnamed-chunk-11

Comparison of Room Rent in Tourist Destination (Yes:No) corresponding to FreeBreakfast (Yes:No)

boxplot(RoomRent ~ FreeBreakfast, data=AllCities.df, horizontal=TRUE,
        ylab="Free Breakfast", xlab="Room Rent (INR)", las=1,
        main="Comparison of Room Rent corresponding to Free Breakfast")

plot of chunk unnamed-chunk-12

Comparison of Room Rent in Tourist Destination (Yes:No) corresponding to HasSwimmingPool (Yes:No)

boxplot(RoomRent ~ HasSwimmingPool, data=AllCities.df, horizontal=TRUE,
        ylab="Swimming Pool", xlab="Room Rent (INR)", las=1,
        main="Comparison of Room Rent corresponding to Swimming Pool")

plot of chunk unnamed-chunk-13

CONTINUOUS VARIABLES - SUMMARY AND VISUALIZATION

Scatterplot of Room Rent versus Hotel Capacity for Tourist and Non-Tourist Destination

library(car)
scatterplot(HotelCapacity ~ RoomRent | IsTouristDestination, data =AllCities.df,
            lwd=2, smooth=TRUE, span=0, cex=0.5, 
            xlab="Room Rent (INR)",
            ylab="Hotel Capacity")

Scatterplot of Room Rent versus Hotel Capacity for Tourist and Non-Tourist Destination

plot of chunk unnamed-chunk-15

Scatterplot of Room Rent versus Airport Distance for Tourist and Non-Tourist Destination

library(car)
scatterplot(Airport ~ RoomRent | IsTouristDestination, data =AllCities.df,
            lwd=2, smooth=TRUE, span=0, cex=0.5, 
            xlab="Room Rent (INR)",
            ylab="Hotel Capacity")

Scatterplot of Room Rent versus Airport Distance for Tourist and Non-Tourist Destination

plot of chunk unnamed-chunk-17

Scatterplot of Room Rent versus Star Rating for Tourist and Non-Tourist Destination

library(car)
scatterplot(StarRating ~ RoomRent | IsTouristDestination, data =AllCities.df,
            lwd=2, smooth=TRUE, span=0, cex=0.5, 
            xlab="Room Rent (INR)",
            ylab="Hotel Capacity")

Scatterplot of Room Rent versus Star Rating for Tourist and Non-Tourist Destination

plot of chunk unnamed-chunk-19

COMBINE SCATTERPLOT OF ROOM RENT, HOTEL CAPACITY, STAR RATING AND AIRPORT DISTANCE

ScatterPlot of Room Rent, Hotel Capacity, Star Rating and Airport

library(car)
scatterplotMatrix(~ RoomRent + HotelCapacity + StarRating + Airport, data=AllCities.df,
                  main="Scatter Plot Matrix")

plot of chunk unnamed-chunk-20

CORRELATION

Correlation Matrix of Room Rent, Hotel Capacity, Star Rating and Airport

# creating a subset of dataframe of {RoomRent, HotelCapacity, StarRating and Airport}
x <- AllCities.df[,c("RoomRent", "HotelCapacity", "StarRating", "Airport")]
# correlation matrix stored in 'matrix'
matrix <- cor(x)
# round upto 2 decimal places
round(matrix, 2)
              RoomRent HotelCapacity StarRating Airport
RoomRent          1.00          0.17       0.41    0.14
HotelCapacity     0.17          1.00       0.55   -0.11
StarRating        0.41          0.55       1.00   -0.05
Airport           0.14         -0.11      -0.05    1.00

Corrgrams of Room Rent, Hotel Capacity, Star Rating and Airport

library(corrgram)
corrgram(x, order=TRUE, lower.panel=panel.conf,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="Corrgram of RoomRent, HotelCapacity, StarRating and Airport")

plot of chunk unnamed-chunk-22