Loading the Hotel Reviews data by Kaggle
reviews <- read.csv("Hotel_Reviews.csv", stringsAsFactors = FALSE)
Combining the new column to the dataset
reviews <- cbind(reviews, Num_nights)
Removing the NA values on Num_nights
library(dplyr)
stay <- reviews %>% filter(Num_nights!="NA")
Changing reviewers’ nationality into factor
stay$Reviewer_Nationality <- as.factor(stay$Reviewer_Nationality)
Seeing the length of stay for each nationality (graphical)
library(ggplot2)
stay %>% group_by(Reviewer_Nationality) %>%
summarise(avg=mean(Num_nights)) %>%
top_n(15) %>%
ggplot(aes(x=reorder(Reviewer_Nationality, -avg), y=avg, fill=Reviewer_Nationality)) +
geom_bar(stat = "identity") +
ggtitle("Length of Stay by Nationality") +
xlab("Nationality") +
theme(axis.text.x = element_text(angle = 90)) +
ylab("Average length of stay")
## Selecting by avg
