Data Analytics Assignment

Author

Chloe O Donovan

library(tidyverse)
library(ggimage)

hotel_satisfaction <- read_csv("hotel_satisfaction.csv")
hotel_satisfaction$eliteSegment <- factor(hotel_satisfaction$eliteSegment, levels = c("NoStatus", "Silver", "Gold", "Platinum"))

Introduction

Taking a look into the hotels satisfaction for the Leonardo hotel in Galway. Here I will be looking further into: - Customer Membership Levels - Customer Satisfaction - Information on Customers Stay - **

Customer Membership Level

membership_status <- hotel_satisfaction %>%
  count(eliteSegment, name = "count") %>% 
  mutate(percentage = count / sum(count) * 100) 

# Create a pie chart
ggplot(membership_status, aes(x = "", y = count, fill = eliteSegment)) +
  geom_bar(stat = "identity", width = 1) +  # Create a stacked bar chart
  coord_polar("y") +  
  labs(
    title = "Customer Segmentation by Membership Level",
    fill = "Membership Status"
  ) +
  theme_void() +  
  scale_fill_manual(values = c("black", "azure2", "gold", "azure4")) +  # Custom colors
  geom_text(
    aes(label = paste0(round(percentage, 1), "%")),
    position = position_stack(vjust = 0.5),
    color = "white")

The pie chart above is showing the different membership levels and the percentages that are in each level. With the information we see here it is showing that most customers have not signed up to become a member. Majority of customers are on the silver level with basic rewards and also showing that they might not stay often to use their membership. The gold level has a drop again of customers on that level meaning the same with not stay as often to get to use their rewards. The Platinum level has the smallest percentage of customers on that level and is assumed that most members within that level come to stay for business or are very loyal to the brand.

Customer Satisfaction

hotel_satisfaction <- hotel_satisfaction %>%
  mutate(room_cleanliness = satCleanRoom + satCleanBath + satCleanCommon)


correlation <- cor(hotel_satisfaction$satOverall, hotel_satisfaction$room_cleanliness, use = "complete.obs")


ggplot(hotel_satisfaction, aes(x = reorder(factor(satOverall), room_cleanliness), y = room_cleanliness)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Room Cleanliness by Overall Satisfaction",
       x = "Overall Satisfaction", 
       y = "Room Cleanliness Score") +
  theme_minimal() +
  annotate("text", x = Inf, y = Inf, label = paste("Correlation:", round(correlation, 2)), 
           hjust = 1.1, vjust = 1.1, size = 5, color = "blue")

The bar chart above is showing the the overall satisfaction with the cleaniness of the room the customer stayed in. This includes how clean the room was, the bathroom, and the overall cleanliness altogether showing a positive correlation and growth in ratings.

Info on Customer Stay

ggplot(data = hotel_satisfaction) +
  geom_histogram(mapping = aes(x = nightsStayed, fill = visitPurpose)) +
  facet_wrap(~ visitPurpose, nrow = 2) +
  scale_fill_manual(values = c("navy", "pink", "maroon", "beige", "green", "yellow")) +
  labs(fill = "Visit Purpose")

The above graphs are showing the amount of nights stayed with the visit purpose the customer came to stay for, showing sports events had the most nights stayed with the longest stay being 20 days, and that concert was the least amount of nights stayed.

Customer Travel and satisfaction with prices

summary_table <- hotel_satisfaction %>%
  summarize(
    Avg_Distance = mean(distanceTraveled),
    Avg_SatParkingPrice = mean(satParkingPrice),
    Avg_SatCloseTransp = mean(satCloseTransp)
  )

print(summary_table)

# A tibble: 1 × 3
  Avg_Distance Avg_SatParkingPrice Avg_SatCloseTransp
         <dbl>               <dbl>              <dbl>
1         785.                4.28               4.07

ggplot(hotel_satisfaction, aes(x = distanceTraveled, y = satParkingPrice, color = as.factor(satCloseTransp))) +
  geom_point() +
  labs(
    x = "Distance Traveled",
    y = "Satisfaction with Parking Price",
    color = "Satisfaction with Close Transport"
  ) +
  ggtitle("Distance Traveled vs Satisfaction with Parking Price") +
  theme_minimal()

heatmap_distance <- hotel_satisfaction %>%
  group_by(satParkingPrice, satCloseTransp) %>%
  summarize(Avg_Distance = mean(distanceTraveled))

ggplot(heatmap_distance, aes(x = factor(satParkingPrice), y = factor(satCloseTransp), fill = Avg_Distance)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  labs(
    x = "Satisfaction with Parking Price",
    y = "Satisfaction with Close Transport",
    fill = "Avg. Distance Traveled"
  ) +
  ggtitle("Average Distance Traveled by Satisfaction Levels") +
  theme_minimal()

The above graph shows most customers rated the parking prices low espeically with the distances they had traveled however the customer who traveled the most distance over 4000km rated a 6 in the parking prices.

Satisfaction of Staff

hotel_satisfaction <- hotel_satisfaction %>%
  mutate(satisfaction_of_staff = satFrontStaff + satDiningStaff + satValetStaff + satHouseStaff)


correlation <- cor(hotel_satisfaction$satOverall, hotel_satisfaction$satisfaction_of_staff, use = "complete.obs")


ggplot(hotel_satisfaction, aes(x = reorder(factor(satOverall), satisfaction_of_staff), y = satisfaction_of_staff)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Staff Satisfaction by Overall Satisfaction",
       x = "Overall Satisfaction", 
       y = "Staff Satisfaction") +
  theme_minimal() +
  annotate("text", x = Inf, y = Inf, label = paste("Correlation:", round(correlation, 2)), 
           hjust = 1.1, vjust = 1.1, size = 5, color = "red")

hotel_satisfaction <- hotel_satisfaction %>%
  mutate(satisfaction_of_staff = satFrontStaff + satDiningStaff + satValetStaff + satHouseStaff)


correlation <- cor(hotel_satisfaction$satOverall, hotel_satisfaction$satisfaction_of_staff, use = "complete.obs")


ggplot(hotel_satisfaction, aes(x = satOverall, y = satisfaction_of_staff)) +
  geom_point(color = "skyblue", size = 2) +
  labs(title = "Staff Satisfaction by Overall Satisfaction",
       x = "Overall Satisfaction", 
       y = "Staff Satisfaction") +
  theme_minimal() +
  annotate("text", x = Inf, y = Inf, label = paste("Correlation:", round(correlation, 2)), 
           hjust = 1.1, vjust = 1.1, size = 5, color = "red")

Analyzing the staff satification with overall ratings customers gave, showing the low ratings with staff satisfaction.

Loyalty & Recgonition

hotel_satisfaction_summary <- hotel_satisfaction %>%
  group_by(eliteSegment, satRecognition) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count) * 100)


ggplot(hotel_satisfaction_summary, aes(x = eliteSegment, y = factor(satRecognition), fill = proportion)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "blue") +
  labs(
    x = "Elite Segment",
    y = "Recognition Satisfaction",
    fill = "Percentage",
    title = "Heatmap of Recognition Satisfaction by Elite Segment"
  ) +
  theme_minimal() +
  geom_text(aes(label = paste0(round(proportion, 1), "%")), color = "black")

The above graph is showing the recognition ratings given by customer’s in the different membership levels. The dark blue areas are the highest ratings customers gave in each membership level. This is showing customers on the platinum gave the highest recognition rating as they most likely got more rewards compared to customers who are in the no status, as they were not signed up to receive rewards and gave lower ratings.

Satisfaction by Visit Purpose

ggplot(hotel_satisfaction, aes(x = visitPurpose, y = satOverall, fill = visitPurpose)) +
  geom_violin() +
  labs(
    x = "Visit Purpose",
    y = "Overall Satisfaction",
    title = "Overall Satisfaction by Visit Purpose"
  ) +
  theme_minimal()

The above violin graph is showing the overall satisfaction the customer rated their stay in terms of their visit purpose. As you can see customer’s who came on a business stay gave a higher rating. Other or mixed gave the most consistent rating.

Spending Habits

Average_spend <- hotel_satisfaction %>%
  summarise(
    avgRoomSpendPerNight = mean(avgRoomSpendPerNight, na.rm = TRUE),
    avgFoodSpendPerNight = mean(avgFoodSpendPerNight, na.rm = TRUE),
    avgWifiSpendPerNight = mean(avgWifiSpendPerNight, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = everything(), names_to = "Category", values_to = "SpendingHabits") %>%
  mutate(Category = recode(Category,
                           "avgRoomSpendPerNight" = "Room per Night",
                           "avgFoodSpendPerNight" = "Food per Night",
                           "avgWifiSpendPerNight" = "Wifi per Night"))


ggplot(Average_spend, aes(x = Category, y = SpendingHabits, fill = Category)) +
  geom_bar(stat = "identity") +
   geom_text(aes(label = paste0("€", round(SpendingHabits, 2))), 
            vjust = -0.5, color = "black") + 
  labs(title = "Average Spending Per Night by Category", x = "Category", y = "Average Amount Spent") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set3")

Overall Satisfaction against spends per night

ggplot(hotel_satisfaction, aes(x = avgRoomSpendPerNight, y = satOverall)) +
  geom_point(alpha = 0.6, color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(
    x = "Average Room Spend Per Night",
    y = "Overall Satisfaction",
    title = "Relationship Between Room Spending and Overall Satisfaction"
  ) +
  theme_minimal()

The above scatter plot is showing the cheaper the room was on the night the customer stayed the higher the rating they gave. As you can see the more they payed for the room, the less satisfied the customer was.

ggplot(hotel_satisfaction, aes(x = avgFoodSpendPerNight, y = satOverall)) +
  geom_point(alpha = 0.6, color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(
    x = "Average Food Spend Per Night",
    y = "Overall Satisfaction",
    title = "Relationship Between Food Spending and Overall Satisfaction"
  ) +
  theme_minimal()

The above scatter plot is showing the overall satisifaction of their stay with how much on average was spent on food per night, showing a steady overall with the amount they paid.

ggplot(hotel_satisfaction, aes(x = avgWifiSpendPerNight, y = satOverall)) +
  geom_point(alpha = 0.6, color = "blue") +
  geom_smooth(method = "lm", color = "red") +
  labs(
    x = "Average Wifi Spend Per Night",
    y = "Overall Satisfaction",
    title = "Relationship Between Wifi Spending and Overall Satisfaction"
  ) +
  theme_minimal()

The above scatterplot is showing the average wifi spend per night showing customers rating a 4 for overall satisfaction with their spend on wifi. ## Visit Purpose with the Distance Travelled

table_1 <- hotel_satisfaction %>%
  group_by(visitPurpose) %>%
  summarise(avg_monthly_charge = mean(distanceTraveled, na.rm = TRUE)) %>% 
  arrange(desc(avg_monthly_charge)) 


knitr::kable(
  table_1, 
  digits = c(0, 2),  # Set precision
  align = "lr",  # Align columns: left for the first and right for the second
  col.names = c("Visit Purpose", "Distance Travelled"),  # Column names
  caption = "Distance Travelled and Visit Purpose",  # Table caption
  table.attr = 'data-quarto-disable-processing = "true"'  # Quarto compatibility
)

Distance Travelled and Visit Purpose
Visit Purpose	Distance Travelled
SportsEvent	884.44
Business	825.19
OtherOrMixed	748.18
Conference	738.76
Vacation	719.39
Concert	668.16

The Table above is showing how far the customer traveled to stay in the hotel and the purpose. From this we can tell Sports Events was the most popular purpose for travelling to the hotel, followed by business stays. At the bottom we see that vacations and concerts were the least popular for purpose of visit from customers.