Start R Markdown

Better uderstandig when people canceled their reservations is very important for owners, menagers, CEO’s and many others people. I’ve tryed to make some questions why? Dataset: https://app.datacamp.com/workspace/external-link?url=https%3A%2F%2Fwww.kaggle.com%2Fdatasets%2Fahsan81%2Fhotel-reservations-classification-dataset

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
reserv<-read.csv("/cloud/project/Hotel Reservations.csv")
head(reserv,5)
##   Booking_ID no_of_adults no_of_children no_of_weekend_nights no_of_week_nights
## 1   INN00001            2              0                    1                 2
## 2   INN00002            2              0                    2                 3
## 3   INN00003            1              0                    2                 1
## 4   INN00004            2              0                    0                 2
## 5   INN00005            2              0                    1                 1
##   type_of_meal_plan required_car_parking_space room_type_reserved lead_time
## 1       Meal Plan 1                          0        Room_Type 1       224
## 2      Not Selected                          0        Room_Type 1         5
## 3       Meal Plan 1                          0        Room_Type 1         1
## 4       Meal Plan 1                          0        Room_Type 1       211
## 5      Not Selected                          0        Room_Type 1        48
##   arrival_year arrival_month arrival_date market_segment_type repeated_guest
## 1         2017            10            2             Offline              0
## 2         2018            11            6              Online              0
## 3         2018             2           28              Online              0
## 4         2018             5           20              Online              0
## 5         2018             4           11              Online              0
##   no_of_previous_cancellations no_of_previous_bookings_not_canceled
## 1                            0                                    0
## 2                            0                                    0
## 3                            0                                    0
## 4                            0                                    0
## 5                            0                                    0
##   avg_price_per_room no_of_special_requests booking_status
## 1              65,00                      0   Not_Canceled
## 2             106,68                      1   Not_Canceled
## 3              60,00                      0       Canceled
## 4             100,00                      0       Canceled
## 5              94,50                      0       Canceled
glimpse(reserv)
## Rows: 36,275
## Columns: 19
## $ Booking_ID                           <chr> "INN00001", "INN00002", "INN00003…
## $ no_of_adults                         <int> 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 1, …
## $ no_of_children                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_weekend_nights                 <int> 1, 2, 2, 0, 1, 0, 1, 1, 0, 0, 1, …
## $ no_of_week_nights                    <int> 2, 3, 1, 2, 1, 2, 3, 3, 4, 5, 0, …
## $ type_of_meal_plan                    <chr> "Meal Plan 1", "Not Selected", "M…
## $ required_car_parking_space           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ room_type_reserved                   <chr> "Room_Type 1", "Room_Type 1", "Ro…
## $ lead_time                            <int> 224, 5, 1, 211, 48, 346, 34, 83, …
## $ arrival_year                         <int> 2017, 2018, 2018, 2018, 2018, 201…
## $ arrival_month                        <int> 10, 11, 2, 5, 4, 9, 10, 12, 7, 10…
## $ arrival_date                         <int> 2, 6, 28, 20, 11, 13, 15, 26, 6, …
## $ market_segment_type                  <chr> "Offline", "Online", "Online", "O…
## $ repeated_guest                       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_previous_cancellations         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ no_of_previous_bookings_not_canceled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ avg_price_per_room                   <chr> "65,00", "106,68", "60,00", "100,…
## $ no_of_special_requests               <int> 0, 1, 0, 0, 0, 1, 1, 1, 1, 3, 0, …
## $ booking_status                       <chr> "Not_Canceled", "Not_Canceled", "…
summary(reserv)
##   Booking_ID         no_of_adults   no_of_children    no_of_weekend_nights
##  Length:36275       Min.   :0.000   Min.   : 0.0000   Min.   :0.0000      
##  Class :character   1st Qu.:2.000   1st Qu.: 0.0000   1st Qu.:0.0000      
##  Mode  :character   Median :2.000   Median : 0.0000   Median :1.0000      
##                     Mean   :1.845   Mean   : 0.1053   Mean   :0.8107      
##                     3rd Qu.:2.000   3rd Qu.: 0.0000   3rd Qu.:2.0000      
##                     Max.   :4.000   Max.   :10.0000   Max.   :7.0000      
##  no_of_week_nights type_of_meal_plan  required_car_parking_space
##  Min.   : 0.000    Length:36275       Min.   :0.00000           
##  1st Qu.: 1.000    Class :character   1st Qu.:0.00000           
##  Median : 2.000    Mode  :character   Median :0.00000           
##  Mean   : 2.204                       Mean   :0.03099           
##  3rd Qu.: 3.000                       3rd Qu.:0.00000           
##  Max.   :17.000                       Max.   :1.00000           
##  room_type_reserved   lead_time       arrival_year  arrival_month   
##  Length:36275       Min.   :  0.00   Min.   :2017   Min.   : 1.000  
##  Class :character   1st Qu.: 17.00   1st Qu.:2018   1st Qu.: 5.000  
##  Mode  :character   Median : 57.00   Median :2018   Median : 8.000  
##                     Mean   : 85.23   Mean   :2018   Mean   : 7.424  
##                     3rd Qu.:126.00   3rd Qu.:2018   3rd Qu.:10.000  
##                     Max.   :443.00   Max.   :2018   Max.   :12.000  
##   arrival_date  market_segment_type repeated_guest   
##  Min.   : 1.0   Length:36275        Min.   :0.00000  
##  1st Qu.: 8.0   Class :character    1st Qu.:0.00000  
##  Median :16.0   Mode  :character    Median :0.00000  
##  Mean   :15.6                       Mean   :0.02564  
##  3rd Qu.:23.0                       3rd Qu.:0.00000  
##  Max.   :31.0                       Max.   :1.00000  
##  no_of_previous_cancellations no_of_previous_bookings_not_canceled
##  Min.   : 0.00000             Min.   : 0.0000                     
##  1st Qu.: 0.00000             1st Qu.: 0.0000                     
##  Median : 0.00000             Median : 0.0000                     
##  Mean   : 0.02335             Mean   : 0.1534                     
##  3rd Qu.: 0.00000             3rd Qu.: 0.0000                     
##  Max.   :13.00000             Max.   :58.0000                     
##  avg_price_per_room no_of_special_requests booking_status    
##  Length:36275       Min.   :0.0000         Length:36275      
##  Class :character   1st Qu.:0.0000         Class :character  
##  Mode  :character   Median :0.0000         Mode  :character  
##                     Mean   :0.6197                           
##                     3rd Qu.:1.0000                           
##                     Max.   :5.0000

Working dataset

reserv_comleet_data<-reserv

#Analysis ## Group by market type

reserve_market_type <- reserv_comleet_data %>% group_by(market_segment_type) %>% 
count(market_segment_type) %>% rename(number=n)

Market type canceled vs non canceled

reserv_market_type_booking_status<-reserv_comleet_data %>% group_by(booking_status) %>% 
count(booking_status)

Observations: market type = ‘Online’ and booking status=‘Canceled’

Meal plan

reserv_market_type_canceled_meal_plan<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled") %>% 
  group_by(type_of_meal_plan) %>% 
  count(type_of_meal_plan) %>% 
  arrange(desc(n))

Room type

reserv_market_type_canceled_room_type<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled") %>% 
  group_by(room_type_reserved) %>% 
  count(room_type_reserved) %>% 
  arrange(desc(n))

Date

Year

reserv_market_type_canceled_date_year<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled") %>% 
  group_by(arrival_year) %>% 
  count(arrival_year) %>% 
  arrange(desc(n))

Year/month

reserv_market_type_canceled_date_yaer_month<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>% 
  group_by(arrival_month) %>% 
  count(arrival_month)

Number of nights 2018

No of week nights

reserv_market_type_canceled_week_nights<-reserv_comleet_data %>%  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>% 
  group_by(no_of_week_nights) %>% 
  count(no_of_week_nights) %>% 
  arrange(desc(n)) %>% 
  rename(Number_week_nights=n)

No of weekend nights

reserv_market_type_canceled_weekend_nights<-reserv_comleet_data %>%  
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>% 
  group_by(no_of_weekend_nights) %>% 
  count(no_of_weekend_nights) %>% 
  arrange(desc(n)) %>% 
  rename(Number_weekend_nights=n)

## Children and adults ### Children and weekend nights

reserv_market_type_canceled_children<-reserv_comleet_data %>%  
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>% 
  filter(no_of_weekend_nights==0|no_of_weekend_nights==1|no_of_weekend_nights==2) %>% 
  group_by(no_of_children) %>% 
  count(no_of_children) %>% 
  rename(number_of_children=n) 
reserv_market_type_canceled_children_sum<-reserv_comleet_data %>%  
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>% 
  filter(no_of_weekend_nights==0|no_of_weekend_nights==1|no_of_weekend_nights==2) %>% 
  count(no_of_children) %>% 
  rename(number_of_children=n) 
head(reserv_market_type_canceled_children,4)
## # A tibble: 4 × 2
## # Groups:   no_of_children [4]
##   no_of_children number_of_children
##            <int>              <int>
## 1              0               6873
## 2              1                507
## 3              2                427
## 4              3                  5
head(reserv_market_type_canceled_children_sum,4)
##   no_of_children number_of_children
## 1              0               6873
## 2              1                507
## 3              2                427
## 4              3                  5

Adults

reserv_market_type_canceled_adults<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  group_by(no_of_adults) %>% 
  count(no_of_adults) %>% 
  rename(no_of_adults_canc=n)

Graph ————————————————————————————–

reserv_market_type_canceled_adults_week_nights<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2) %>% 
  group_by(no_of_week_nights) %>% 
  count(no_of_week_nights)

Graph ————————————————————————————–

## Meal plan 2

#### cose meal plan -----------------------------------------------------
reserv_market_type_canceled_adults_week_nights_meal<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2) %>% 
  group_by(type_of_meal_plan) %>% 
  count(type_of_meal_plan)

## Room type 2

reserv_market_type_canceled_adults_week_nights_meal_room<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>% 
  group_by(room_type_reserved) %>% 
  count(room_type_reserved)

## Car, leed time and other conditions

reserv_market_type_canceled_adults_week_nights_meal_room_car<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>% 
  group_by(required_car_parking_space) %>% 
  count(required_car_parking_space)
reserv_market_type_canceled_adults_week_nights_meal_room_car_repeated<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>% 
  filter(required_car_parking_space==0) %>% 
  group_by(repeated_guest) %>% 
  count(repeated_guest)

adult_canceled_previous_booking_special<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>% 
  filter(required_car_parking_space==0) %>% 
  filter(repeated_guest==0&no_of_previous_cancellations==0) %>% 
  filter(no_of_previous_bookings_not_canceled==0) %>% 
  group_by(no_of_special_requests) %>% 
  count(no_of_special_requests)

cancelation_leed_mediana<-reserv_comleet_data %>% 
  filter(market_segment_type=='Online' & booking_status=="Canceled" & arrival_year=='2018') %>%
  filter(no_of_adults==2&type_of_meal_plan=='Meal Plan 1') %>% 
  filter(required_car_parking_space==0) %>% 
  filter(repeated_guest==0&no_of_previous_cancellations==0) %>% 
  filter(no_of_previous_bookings_not_canceled==0) %>% 
  mutate(diff_lead_time=lead_time-median(lead_time)) %>% 
  count(lead_time>=median(lead_time))
## Graph ----------------------------------------------------------
barplot(rowSums(cancelation_leed_mediana),
        col=c("aquamarine3", "coral"), 
        names.arg=LETTERS[1:2]) 
      title(main="Canceled reservations 2017-2018", 
      cex.main = 2,   font.main= 4, col.main= "blue",
      cex.sub = 0.75, font.sub = 3, col.sub = "red",
      xlab='Lead time more or less then mediana',
      ylab='Number of cancelations')
## Add text coordinates
      text(1.9, 100, "Less - 2097 pers.")
      text(0.7, 100, "More - 2104 pers.")