Please check homework2. This analysis is based on the dataset contains hotel bookings from 2015-2017. The question is identified as “Find the customer with most waiting days on the list”.
knitr::opts_chunk$set(echo = TRUE)
Distill is a publication format for scientific and technical writing, native to the web.
Learn more about using Distill for R Markdown at https://rstudio.github.io/distill.
head(hotels)
# A tibble: 6 × 32
hotel is_canceled lead_time arrival_date_ye… arrival_date_mo…
<chr> <dbl> <dbl> <dbl> <chr>
1 Resort Hotel 0 342 2015 July
2 Resort Hotel 0 737 2015 July
3 Resort Hotel 0 7 2015 July
4 Resort Hotel 0 13 2015 July
5 Resort Hotel 0 14 2015 July
6 Resort Hotel 0 14 2015 July
# … with 27 more variables: arrival_date_week_number <dbl>,
# arrival_date_day_of_month <dbl>, stays_in_weekend_nights <dbl>,
# stays_in_week_nights <dbl>, adults <dbl>, children <dbl>,
# babies <dbl>, meal <chr>, country <chr>, market_segment <chr>,
# distribution_channel <chr>, is_repeated_guest <dbl>,
# previous_cancellations <dbl>,
# previous_bookings_not_canceled <dbl>, reserved_room_type <chr>, …
colnames(hotels)
[1] "hotel" "is_canceled"
[3] "lead_time" "arrival_date_year"
[5] "arrival_date_month" "arrival_date_week_number"
[7] "arrival_date_day_of_month" "stays_in_weekend_nights"
[9] "stays_in_week_nights" "adults"
[11] "children" "babies"
[13] "meal" "country"
[15] "market_segment" "distribution_channel"
[17] "is_repeated_guest" "previous_cancellations"
[19] "previous_bookings_not_canceled" "reserved_room_type"
[21] "assigned_room_type" "booking_changes"
[23] "deposit_type" "agent"
[25] "company" "days_in_waiting_list"
[27] "customer_type" "adr"
[29] "required_car_parking_spaces" "total_of_special_requests"
[31] "reservation_status" "reservation_status_date"
#As you may see with colnames(), we have list all variables
#Varible type classification:
#String/char type
#<chr>: hotel, arrival_date_month, meal, country, market_segment, distribution_channel, reserved_room_type, assigned_room_type, deposit_type, agent, company, customer_type, reservation_status
#Numeric type
#<dbl>: is_canceled, lead_time, arrival_date_year, arrival_date_week_number, arrival_date_day_of_month, stays_in_weekend_nights, stays_in_week_nights, adults, children, babies, is_repeated_guest, previous_cancellations, previous_bookings_not_canceled, booking_changes, days_in_waiting_list, adr, required_car_parking_spaces, total_of_special_requests,
#Date type
#<date>: reservation_status_date
#Here we would like to figure out/slice out the customer who has been in the waiting list for more than 1 day and using descending order to rank them so that we know who needs to be serviced at priority
library(dplyr)
#Use filter() to list all the customers
waitingList <- filter(hotels, `days_in_waiting_list` > 0)
head(select(waitingList, `days_in_waiting_list`))
# A tibble: 6 × 1
days_in_waiting_list
<dbl>
1 50
2 47
3 47
4 47
5 47
6 47
#descend to find the most urgent ones
waitingListDec <- arrange(waitingList, desc(`days_in_waiting_list`))
head(select(waitingListDec, `days_in_waiting_list`))
# A tibble: 6 × 1
days_in_waiting_list
<dbl>
1 391
2 391
3 391
4 391
5 391
6 391
head(waitingListDec)
# A tibble: 6 × 32
hotel is_canceled lead_time arrival_date_year arrival_date_mon…
<chr> <dbl> <dbl> <dbl> <chr>
1 City Hotel 1 443 2016 October
2 City Hotel 1 443 2016 October
3 City Hotel 1 443 2016 October
4 City Hotel 1 443 2016 October
5 City Hotel 1 443 2016 October
6 City Hotel 1 443 2016 October
# … with 27 more variables: arrival_date_week_number <dbl>,
# arrival_date_day_of_month <dbl>, stays_in_weekend_nights <dbl>,
# stays_in_week_nights <dbl>, adults <dbl>, children <dbl>,
# babies <dbl>, meal <chr>, country <chr>, market_segment <chr>,
# distribution_channel <chr>, is_repeated_guest <dbl>,
# previous_cancellations <dbl>,
# previous_bookings_not_canceled <dbl>, reserved_room_type <chr>, …
#Hence we know that some customers have been waiting for 391 days