Better uderstandig when people canceled their reservations is very important for owners, menagers, CEO’s and many others people. I’ve tryed to make some questions why? Dataset: https://app.datacamp.com/workspace/external-link?url=https%3A%2F%2Fwww.kaggle.com%2Fdatasets%2Fahsan81%2Fhotel-reservations-classification-dataset
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
reserv<-read.csv("/cloud/project/Hotel Reservations.csv")
head(reserv,5)
## Booking_ID no_of_adults no_of_children no_of_weekend_nights no_of_week_nights
## 1 INN00001 2 0 1 2
## 2 INN00002 2 0 2 3
## 3 INN00003 1 0 2 1
## 4 INN00004 2 0 0 2
## 5 INN00005 2 0 1 1
## type_of_meal_plan required_car_parking_space room_type_reserved lead_time
## 1 Meal Plan 1 0 Room_Type 1 224
## 2 Not Selected 0 Room_Type 1 5
## 3 Meal Plan 1 0 Room_Type 1 1
## 4 Meal Plan 1 0 Room_Type 1 211
## 5 Not Selected 0 Room_Type 1 48
## arrival_year arrival_month arrival_date market_segment_type repeated_guest
## 1 2017 10 2 Offline 0
## 2 2018 11 6 Online 0
## 3 2018 2 28 Online 0
## 4 2018 5 20 Online 0
## 5 2018 4 11 Online 0
## no_of_previous_cancellations no_of_previous_bookings_not_canceled
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## avg_price_per_room no_of_special_requests booking_status
## 1 65,00 0 Not_Canceled
## 2 106,68 1 Not_Canceled
## 3 60,00 0 Canceled
## 4 100,00 0 Canceled
## 5 94,50 0 Canceled
glimpse(reserv)
## Rows: 36,275
## Columns: 19
## $ Booking_ID <chr> "INN00001", "INN00002", "INN00003…
## $ no_of_adults <int> 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 1, …
## $ no_of_children <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_weekend_nights <int> 1, 2, 2, 0, 1, 0, 1, 1, 0, 0, 1, …
## $ no_of_week_nights <int> 2, 3, 1, 2, 1, 2, 3, 3, 4, 5, 0, …
## $ type_of_meal_plan <chr> "Meal Plan 1", "Not Selected", "M…
## $ required_car_parking_space <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ room_type_reserved <chr> "Room_Type 1", "Room_Type 1", "Ro…
## $ lead_time <int> 224, 5, 1, 211, 48, 346, 34, 83, …
## $ arrival_year <int> 2017, 2018, 2018, 2018, 2018, 201…
## $ arrival_month <int> 10, 11, 2, 5, 4, 9, 10, 12, 7, 10…
## $ arrival_date <int> 2, 6, 28, 20, 11, 13, 15, 26, 6, …
## $ market_segment_type <chr> "Offline", "Online", "Online", "O…
## $ repeated_guest <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_previous_cancellations <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_previous_bookings_not_canceled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ avg_price_per_room <chr> "65,00", "106,68", "60,00", "100,…
## $ no_of_special_requests <int> 0, 1, 0, 0, 0, 1, 1, 1, 1, 3, 0, …
## $ booking_status <chr> "Not_Canceled", "Not_Canceled", "…
summary(reserv)
## Booking_ID no_of_adults no_of_children no_of_weekend_nights
## Length:36275 Min. :0.000 Min. : 0.0000 Min. :0.0000
## Class :character 1st Qu.:2.000 1st Qu.: 0.0000 1st Qu.:0.0000
## Mode :character Median :2.000 Median : 0.0000 Median :1.0000
## Mean :1.845 Mean : 0.1053 Mean :0.8107
## 3rd Qu.:2.000 3rd Qu.: 0.0000 3rd Qu.:2.0000
## Max. :4.000 Max. :10.0000 Max. :7.0000
## no_of_week_nights type_of_meal_plan required_car_parking_space
## Min. : 0.000 Length:36275 Min. :0.00000
## 1st Qu.: 1.000 Class :character 1st Qu.:0.00000
## Median : 2.000 Mode :character Median :0.00000
## Mean : 2.204 Mean :0.03099
## 3rd Qu.: 3.000 3rd Qu.:0.00000
## Max. :17.000 Max. :1.00000
## room_type_reserved lead_time arrival_year arrival_month
## Length:36275 Min. : 0.00 Min. :2017 Min. : 1.000
## Class :character 1st Qu.: 17.00 1st Qu.:2018 1st Qu.: 5.000
## Mode :character Median : 57.00 Median :2018 Median : 8.000
## Mean : 85.23 Mean :2018 Mean : 7.424
## 3rd Qu.:126.00 3rd Qu.:2018 3rd Qu.:10.000
## Max. :443.00 Max. :2018 Max. :12.000
## arrival_date market_segment_type repeated_guest
## Min. : 1.0 Length:36275 Min. :0.00000
## 1st Qu.: 8.0 Class :character 1st Qu.:0.00000
## Median :16.0 Mode :character Median :0.00000
## Mean :15.6 Mean :0.02564
## 3rd Qu.:23.0 3rd Qu.:0.00000
## Max. :31.0 Max. :1.00000
## no_of_previous_cancellations no_of_previous_bookings_not_canceled
## Min. : 0.00000 Min. : 0.0000
## 1st Qu.: 0.00000 1st Qu.: 0.0000
## Median : 0.00000 Median : 0.0000
## Mean : 0.02335 Mean : 0.1534
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000
## Max. :13.00000 Max. :58.0000
## avg_price_per_room no_of_special_requests booking_status
## Length:36275 Min. :0.0000 Length:36275
## Class :character 1st Qu.:0.0000 Class :character
## Mode :character Median :0.0000 Mode :character
## Mean :0.6197
## 3rd Qu.:1.0000
## Max. :5.0000
reserv_comleet_data<-reserv
#Analysis ## Group by market type
reserve_market_type <- reserv_comleet_data %>% group_by(market_segment_type) %>%
count(market_segment_type) %>% rename(number=n)
reserv_market_type_booking_status<-reserv_comleet_data %>% group_by(booking_status) %>%
count(booking_status)
reserv_market_type_canceled_meal_plan<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled") %>%
group_by(type_of_meal_plan) %>%
count(type_of_meal_plan) %>%
arrange(desc(n))
reserv_market_type_canceled_room_type<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled") %>%
group_by(room_type_reserved) %>%
count(room_type_reserved) %>%
arrange(desc(n))
reserv_market_type_canceled_date_year<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled") %>%
group_by(arrival_year) %>%
count(arrival_year) %>%
arrange(desc(n))
reserv_market_type_canceled_date_yaer_month<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
group_by(arrival_month) %>%
count(arrival_month)
reserv_market_type_canceled_week_nights<-reserv_comleet_data %>% filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
group_by(no_of_week_nights) %>%
count(no_of_week_nights) %>%
arrange(desc(n)) %>%
rename(Number_week_nights=n)
reserv_market_type_canceled_weekend_nights<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
group_by(no_of_weekend_nights) %>%
count(no_of_weekend_nights) %>%
arrange(desc(n)) %>%
rename(Number_weekend_nights=n)
## Children and adults ### Children and weekend nights
reserv_market_type_canceled_children<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_weekend_nights==0|no_of_weekend_nights==1|no_of_weekend_nights==2) %>%
group_by(no_of_children) %>%
count(no_of_children) %>%
rename(number_of_children=n)
reserv_market_type_canceled_children_sum<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_weekend_nights==0|no_of_weekend_nights==1|no_of_weekend_nights==2) %>%
count(no_of_children) %>%
rename(number_of_children=n)
head(reserv_market_type_canceled_children,4)
## # A tibble: 4 × 2
## # Groups: no_of_children [4]
## no_of_children number_of_children
## <int> <int>
## 1 0 6873
## 2 1 507
## 3 2 427
## 4 3 5
head(reserv_market_type_canceled_children_sum,4)
## no_of_children number_of_children
## 1 0 6873
## 2 1 507
## 3 2 427
## 4 3 5
reserv_market_type_canceled_adults<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
group_by(no_of_adults) %>%
count(no_of_adults) %>%
rename(no_of_adults_canc=n)
reserv_market_type_canceled_adults_week_nights<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2) %>%
group_by(no_of_week_nights) %>%
count(no_of_week_nights)
## Meal plan 2
#### cose meal plan -----------------------------------------------------
reserv_market_type_canceled_adults_week_nights_meal<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2) %>%
group_by(type_of_meal_plan) %>%
count(type_of_meal_plan)
## Room type 2
reserv_market_type_canceled_adults_week_nights_meal_room<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>%
group_by(room_type_reserved) %>%
count(room_type_reserved)
## Car, leed time and other conditions
reserv_market_type_canceled_adults_week_nights_meal_room_car<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>%
group_by(required_car_parking_space) %>%
count(required_car_parking_space)
reserv_market_type_canceled_adults_week_nights_meal_room_car_repeated<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>%
filter(required_car_parking_space==0) %>%
group_by(repeated_guest) %>%
count(repeated_guest)
adult_canceled_previous_booking_special<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>%
filter(required_car_parking_space==0) %>%
filter(repeated_guest==0&no_of_previous_cancellations==0) %>%
filter(no_of_previous_bookings_not_canceled==0) %>%
group_by(no_of_special_requests) %>%
count(no_of_special_requests)
cancelation_leed_mediana<-reserv_comleet_data %>%
filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>%
filter(required_car_parking_space==0) %>%
filter(repeated_guest==0&no_of_previous_cancellations==0) %>%
filter(no_of_previous_bookings_not_canceled==0) %>%
mutate(diff_lead_time=lead_time-median(lead_time)) %>%
count(lead_time>=median(lead_time))
## Graph ----------------------------------------------------------
barplot(rowSums(cancelation_leed_mediana),
col=c("aquamarine3", "coral"),
names.arg=LETTERS[1:2])
title(main="Canceled reservations 2017-2018",
cex.main = 2, font.main= 4, col.main= "blue",
cex.sub = 0.75, font.sub = 3, col.sub = "red",
xlab='Lead time more or less then mediana',
ylab='Number of cancelations')
## Add text coordinates
text(1.9, 100, "Less - 2097 pers.")
text(0.7, 100, "More - 2104 pers.")