library(tidyverse)
library(ggplot2)
library(dplyr)
library(DT)
library(shiny)
library(plotly)
library(dygraphs)
library(ggiraph)
library(tmaptools)
library(ggalluvial)
library(viridis)## [1] "C:/Users/Adinda Rizky Herawat/Dropbox/Fall Semester 2020_2021/3. Visualization in R/Project"
This dataset consist of 119,390 observations and 32 variables from kaggle (https://www.kaggle.com/jessemostipak/hotel-booking-demand). This dataset is about hotel booking information (city and resort hotel) such as room type, country, number of adults and children, number of staying and etc. Aim of this project, predicting average daily rates hotels based on some variables with the best regression methods.
Below is the explanation about our variables
length(unique(hotel$country))head(booking) %>%
knitr::kable(caption = "Booking Dataset") %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = FALSE)| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | NULL | NULL | 0 | Transient | 0 | 0 | 0 | Check-Out | 2015-07-01 |
| Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | NULL | NULL | 0 | Transient | 0 | 0 | 0 | Check-Out | 2015-07-01 |
| Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | NULL | NULL | 0 | Transient | 75 | 0 | 0 | Check-Out | 2015-07-02 |
| Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304 | NULL | 0 | Transient | 75 | 0 | 0 | Check-Out | 2015-07-02 |
| Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240 | NULL | 0 | Transient | 98 | 0 | 1 | Check-Out | 2015-07-03 |
| Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240 | NULL | 0 | Transient | 98 | 0 | 1 | Check-Out | 2015-07-03 |
## hotel is_canceled lead_time arrival_date_year
## Length:119390 Min. :0.0000 Min. : 0 Min. :2015
## Class :character 1st Qu.:0.0000 1st Qu.: 18 1st Qu.:2016
## Mode :character Median :0.0000 Median : 69 Median :2016
## Mean :0.3704 Mean :104 Mean :2016
## 3rd Qu.:1.0000 3rd Qu.:160 3rd Qu.:2017
## Max. :1.0000 Max. :737 Max. :2017
##
## arrival_date_month arrival_date_week_number arrival_date_day_of_month
## Length:119390 Min. : 1.00 Min. : 1.0
## Class :character 1st Qu.:16.00 1st Qu.: 8.0
## Mode :character Median :28.00 Median :16.0
## Mean :27.17 Mean :15.8
## 3rd Qu.:38.00 3rd Qu.:23.0
## Max. :53.00 Max. :31.0
##
## stays_in_weekend_nights stays_in_week_nights adults
## Min. : 0.0000 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.0000 1st Qu.: 1.0 1st Qu.: 2.000
## Median : 1.0000 Median : 2.0 Median : 2.000
## Mean : 0.9276 Mean : 2.5 Mean : 1.856
## 3rd Qu.: 2.0000 3rd Qu.: 3.0 3rd Qu.: 2.000
## Max. :19.0000 Max. :50.0 Max. :55.000
##
## children babies meal country
## Min. : 0.0000 Min. : 0.000000 Length:119390 Length:119390
## 1st Qu.: 0.0000 1st Qu.: 0.000000 Class :character Class :character
## Median : 0.0000 Median : 0.000000 Mode :character Mode :character
## Mean : 0.1039 Mean : 0.007949
## 3rd Qu.: 0.0000 3rd Qu.: 0.000000
## Max. :10.0000 Max. :10.000000
## NA's :4
## market_segment distribution_channel is_repeated_guest
## Length:119390 Length:119390 Min. :0.00000
## Class :character Class :character 1st Qu.:0.00000
## Mode :character Mode :character Median :0.00000
## Mean :0.03191
## 3rd Qu.:0.00000
## Max. :1.00000
##
## previous_cancellations previous_bookings_not_canceled reserved_room_type
## Min. : 0.00000 Min. : 0.0000 Length:119390
## 1st Qu.: 0.00000 1st Qu.: 0.0000 Class :character
## Median : 0.00000 Median : 0.0000 Mode :character
## Mean : 0.08712 Mean : 0.1371
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000
## Max. :26.00000 Max. :72.0000
##
## assigned_room_type booking_changes deposit_type agent
## Length:119390 Min. : 0.0000 Length:119390 Length:119390
## Class :character 1st Qu.: 0.0000 Class :character Class :character
## Mode :character Median : 0.0000 Mode :character Mode :character
## Mean : 0.2211
## 3rd Qu.: 0.0000
## Max. :21.0000
##
## company days_in_waiting_list customer_type adr
## Length:119390 Min. : 0.000 Length:119390 Min. : -6.38
## Class :character 1st Qu.: 0.000 Class :character 1st Qu.: 69.29
## Mode :character Median : 0.000 Mode :character Median : 94.58
## Mean : 2.321 Mean : 101.83
## 3rd Qu.: 0.000 3rd Qu.: 126.00
## Max. :391.000 Max. :5400.00
##
## required_car_parking_spaces total_of_special_requests reservation_status
## Min. :0.00000 Min. :0.0000 Length:119390
## 1st Qu.:0.00000 1st Qu.:0.0000 Class :character
## Median :0.00000 Median :0.0000 Mode :character
## Mean :0.06252 Mean :0.5714
## 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :8.00000 Max. :5.0000
##
## reservation_status_date
## Length:119390
## Class :character
## Mode :character
##
##
##
##
## [1] 119390
## [1] 32
#convert variables to factor type:
booking$is_canceled <- as.factor(booking$is_canceled)
booking$customer_type <- as.factor(booking$customer_type)
booking$hotel <- as.factor(booking$hotel)
booking$meal <- factor(booking$meal, levels = c("BB","FB","HB","SC","Undefined"),
ordered = FALSE)
booking$arrival_date_month <- factor(booking$arrival_date_month,
levels=c("January","Februari", "March", "April","May","June",
"July","August","September",
"October","November","December"),
ordered = FALSE)
booking$distribution_channel <- factor(booking$distribution_channel, levels=
c("Direct","Corporate","TA/TO","Undefined", "GDS"),
ordered=FALSE)booking <- booking %>%
mutate(quarter = case_when (
arrival_date_week_number >= 1 & arrival_date_week_number<= 14 ~ 'Q1',
arrival_date_week_number > 14 & arrival_date_week_number<= 27 ~ 'Q2',
arrival_date_week_number > 27 & arrival_date_week_number<= 40 ~ 'Q3',
arrival_date_week_number > 40 & arrival_date_week_number<= 53 ~ 'Q4'))
booking$quarter <- factor(booking$quarter, levels=c("Q1","Q2","Q3","Q4"), ordered=TRUE)
booking$market_segment <- factor(booking$market_segment, levels = c(
"Direct","Corporate","Online TA","Offline
TA/TO","Complementary","Groups","Undefined","Aviation" ), ordered = FALSE)#Convert the location
booking <- booking %>%
mutate(country_name = case_when (
country == 'PRT' ~ 'Portugal',
country == 'GBR' ~ 'United Kingdom',
country == 'USA' ~ 'United States of America',
country == 'ESP' ~ 'Spain',
country == 'IRL' ~ 'Ireland',
country == 'FRA' ~ 'France',
country == 'ROU' ~ 'Romania',
country == 'NOR' ~ 'Norway',
country == 'OMN' ~ 'Oman',
country == 'ARG' ~ 'Argentina',
country == 'POL' ~ 'Poland',
country == 'DEU' ~ 'Germany',
country == 'BEL' ~ 'Belgium',
country == 'CHE' ~ 'Switzerland',
country == 'CN' ~ 'China',
country == 'GRC' ~ 'Greece',
country == 'ITA' ~ 'Italy',
country == 'NLD' ~ 'Netherlands',
country == 'DNK' ~ 'Denmark',
country == 'RUS' ~ 'Russian Federation',
country == 'SWE' ~ 'Sweden',
country == 'AUS' ~ 'Australia',
country == 'EST' ~ 'Estonia',
country == 'CZE' ~ 'Czech Republic',
country == 'BRA' ~ 'Brazil',
country == 'FIN' ~ 'Finland',
country == 'MOZ' ~ 'Mozambique',
country == 'BWA' ~ 'Botswana',
country == 'LUX' ~ 'Luxembourg',
country == 'SVN' ~ 'Slovenia',
country == 'ALB' ~ 'Albania',
country == 'IND' ~ 'India',
country == 'CHN' ~ 'China',
country == 'MEX' ~ 'Mexico',
country == 'MAR' ~ 'Morocco',
country == 'UKR' ~ 'Ukraine',
country == 'SMR' ~ 'San Marino',
country == 'LVA' ~ 'Latvia',
country == 'PRI' ~ 'Puerto Rico',
country == 'SRB' ~ 'Serbia',
country == 'CHL' ~ 'Chile',
country == 'AUT' ~ 'Austria',
country == 'BLR' ~ 'Belarus',
country == 'LTU' ~ 'Lithuania',
country == 'TUR' ~ 'Turkey',
country == 'ZAF' ~ 'South Africa',
country == 'AGO' ~ 'Angola',
country == 'ISR' ~ 'Israel',
country == 'CYM' ~ 'Cayman Islands',
country == 'ZMB' ~ 'Zambia',
country == 'CPV' ~ 'Cape Verde',
country == 'ZWE' ~ 'Zimbabwe',
country == 'DZA' ~ 'Algeria',
country == 'KOR' ~ 'South Korea',
country == 'CRI' ~ 'Costa Rica',
country == 'HUN' ~ 'Hungary',
country == 'ARE' ~ 'United Arab Emirates',
country == 'TUN' ~ 'Tunisia',
country == 'JAM' ~ 'Jamaica',
country == 'HRV' ~ 'Croatia',
country == 'HKG' ~ 'Hongkong',
country == 'IRN' ~ 'Iran',
country == 'GEO' ~ 'Georgia',
country == 'AND' ~ 'Andorra',
country == 'GIB' ~ 'Gibraltar',
country == 'URY' ~ 'Uruguay',
country == 'JEY' ~ 'Jersey',
country == 'CAF' ~ 'Central African Republic',
country == 'CYP' ~ 'Cyprus',
country == 'COL' ~ 'Colombia',
country == 'GGY' ~ 'Guernsey',
country == 'KWT' ~ 'Kuwait',
country == 'NGA' ~ 'Nigeria',
country == 'MDV' ~ 'Maldives',
country == 'VEN' ~ 'Venezuela',
country == 'SVK' ~ 'Slovakia',
country == 'FJI' ~ 'Fiji',
country == 'KAZ' ~ 'Kazakhstan',
country == 'PAK' ~ 'Pakistan',
country == 'IDN' ~ 'Indonesia',
country == 'LBN' ~ 'Lebanon',
country == 'PHL' ~ 'Philippines',
country == 'SEN' ~ 'Senegal',
country == 'SYC' ~ 'Seychelles',
country == 'AZE' ~ 'Azerbaijan',
country == 'BHR' ~ 'Bahrain',
country == 'NZL' ~ 'New Zealand',
country == 'THA' ~ 'Thailand',
country == 'DOM' ~ 'Dominican Republic',
country == 'MKD' ~ 'Republic of Macedonia',
country == 'MYS' ~ 'Malaysia',
country == 'ARM' ~ 'Armenia',
country == 'JPN' ~ 'Japan',
country == 'LKA' ~ 'Sri Lanka',
country == 'CUB' ~ 'Cuba',
country == 'CMR' ~ 'Cameroon',
country == 'BIH' ~ 'Bosnia and Herzegovina',
country == 'MUS' ~ 'Mauritius',
country == 'COM' ~ 'Comoros',
country =='SUR' ~ 'Suriname',
country == 'UGA' ~ 'Uganda',
country == 'BGR' ~ 'Bulgaria',
country == 'CIV' ~ 'Côte d Ivoire',
country == 'JOR' ~ 'Jordan',
country == 'SYR' ~ 'Syria',
country == 'SGP' ~ 'Singapore',
country == 'BDI' ~ 'Burundi',
country == 'SAU' ~ 'Saudi Arabia',
country == 'VNM' ~ 'Vietnam',
country == 'PLW' ~ 'Palau',
country == 'QAT' ~ 'Qatar',
country == 'EGY' ~ 'Egypt',
country == 'PER' ~ 'Peru',
country == 'MLT' ~ 'Malta',
country == 'MWI' ~ 'Malawi',
country == 'ECU' ~ 'Ecuador',
country == 'MDG' ~ 'Madagascar',
country == 'ISL' ~ 'Iceland',
country == 'UZB' ~ 'Uzbekistan',
country == 'NPL' ~ 'Nepal',
country == 'BHS' ~ 'Bahamas',
country == 'MAC' ~ 'Macao',
country == 'TGO' ~ 'Togo',
country == 'TWN' ~ 'Taiwan',
country == 'DJI' ~ 'Djibouti',
country == 'STP' ~ 'Sao Tome and Principe',
country == 'KNA' ~ 'Saint Kitts and Nevis',
country == 'ETH' ~ 'Ethiopia',
country == 'IRQ' ~ 'Iraq',
country == 'HND' ~ 'Honduras',
country == 'RWA' ~ 'Rwanda',
country == 'KHM' ~ 'Cambodia',
country == 'MCO' ~ 'Monaco',
country == 'BGD' ~ 'Bangladesh',
country == 'IMN' ~ 'Isle of Man',
country == 'TJK' ~ 'Tajikistan',
country == 'NIC' ~ 'Nicaragua',
country == 'BEN' ~ 'Benin',
country == 'VGB' ~ 'British Virgin Islands',
country == 'TZA' ~ 'Tanzania',
country == 'GAB' ~ 'Gabon',
country == 'GHA' ~ 'Ghana',
country == 'TMP' ~ 'East Timor',
country == 'GLP' ~ 'Guadeloupe',
country == 'KEN' ~ 'Kenya',
country == 'LIE' ~ 'Liechtenstein',
country == 'GNB' ~ 'Guinea-Bissau',
country == 'MNE' ~ 'Montenegro',
#country == 'UMI' ~ 'US Minor Outlying Islands',
country == 'MYT' ~ 'Mayotte',
country == 'FRO' ~ 'Faroe Islands',
country == 'MMR' ~ 'Myanmar',
country == 'PAN' ~ 'Panama',
country == 'BFA' ~ 'Burkina Faso',
country == 'LBY' ~ 'Libya',
country == 'MLI' ~ 'Mali',
country == 'NAM' ~ 'Namibia',
country == 'BOL' ~ 'Bolivia',
country == 'PRY' ~ 'Paraguay',
country == 'BRB' ~ 'Barbados',
country == 'ABW' ~ 'Aruba',
country == 'AIA' ~ 'Anguilla',
country == 'SLV' ~ 'El Salvador',
country == 'DMA' ~ 'Dominica',
country == 'PYF' ~ 'French Polynesia',
country == 'GUY' ~ 'Guyana',
country == 'LCA' ~ 'Saint Lucia',
country == 'ATA' ~ 'Antarctica',
country == 'GTM' ~ 'Guatemala',
country == 'ASM' ~ 'American Samoa',
country == 'MRT' ~ 'Mauritania',
country == 'NCL' ~ 'New Caledonia',
country == 'KIR' ~ 'Kiribati',
country == 'SDN' ~ 'Sudan',
country == 'ATF' ~ 'French Southern Territories',
country == 'SLE' ~ 'Sierra Leone',
country == 'LAO' ~ 'Lao PDR'))booking <- booking %>%
mutate(continent = case_when (
country == 'PRT' ~ 'Europe & UK',
country == 'GBR' ~ 'Europe & UK',
country == 'USA' ~ 'North America',
country == 'ESP' ~ 'Europe & UK',
country == 'IRL' ~ 'Europe & UK',
country == 'FRA' ~ 'Europe & UK',
country == 'ROU' ~ 'Europe & UK',
country == 'NOR' ~ 'Europe & UK',
country == 'OMN' ~ 'Asia',
country == 'ARG' ~ 'South America',
country == 'POL' ~ 'Europe & UK',
country == 'DEU' ~ 'Europe & UK',
country == 'BEL' ~ 'Europe & UK',
country == 'CHE' ~ 'Europe & UK',
country == 'CN' ~ 'Asia',
country == 'GRC' ~ 'Europe & UK',
country == 'ITA' ~ 'Europe & UK',
country == 'NLD' ~ 'Europe & UK',
country == 'DNK' ~ 'Europe & UK',
country == 'RUS' ~ 'Europe & UK',
country == 'SWE' ~ 'Europe & UK',
country == 'AUS' ~ 'Australia/Oceania',
country == 'EST' ~ 'Europe & UK',
country == 'CZE' ~ 'Europe & UK',
country == 'BRA' ~ 'South America',
country == 'FIN' ~ 'Europe & UK',
country == 'MOZ' ~ 'Africa',
country == 'BWA' ~ 'Africa',
country == 'LUX' ~ 'Europe & UK',
country == 'SVN' ~ 'Europe & UK',
country == 'ALB' ~ 'Europe & UK',
country == 'IND' ~ 'Asia',
country == 'CHN' ~ 'Asia',
country == 'MEX' ~ 'North America',
country == 'MAR' ~ 'Africa',
country == 'UKR' ~ 'Europe & UK',
country == 'SMR' ~ 'Europe & UK',
country == 'LVA' ~ 'Europe & UK',
country == 'PRI' ~ 'North America',
country == 'SRB' ~ 'Europe & UK',
country == 'CHL' ~ 'South America',
country == 'AUT' ~ 'Europe & UK',
country == 'BLR' ~ 'Europe & UK',
country == 'LTU' ~ 'Europe & UK',
country == 'TUR' ~ 'Europe & UK',
country == 'ZAF' ~ 'Africa',
country == 'AGO' ~ 'Africa',
country == 'ISR' ~ 'Asia',
country == 'CYM' ~ 'Europe & UK',
country == 'ZMB' ~ 'Africa',
country == 'CPV' ~ 'Africa',
country == 'ZWE' ~ 'Africa',
country == 'DZA' ~ 'Africa',
country == 'KOR' ~ 'Asia',
country == 'CRI' ~ 'North America',
country == 'HUN' ~ 'Europe & UK',
country == 'ARE' ~ 'Asia',
country == 'TUN' ~ 'Africa',
country == 'JAM' ~ 'North America',
country == 'HRV' ~ 'Europe & UK',
country == 'HKG' ~ 'Asia',
country == 'IRN' ~ 'Asia',
country == 'GEO' ~ 'Europe & UK',
country == 'AND' ~ 'Europe & UK',
country == 'GIB' ~ 'Europe & UK',
country == 'URY' ~ 'South America',
country == 'JEY' ~ 'Europe & UK',
country == 'CAF' ~ 'Africa',
country == 'CYP' ~ 'Europe & UK',
country == 'COL' ~ 'South America',
country == 'GGY' ~ 'Europe & UK',
country == 'KWT' ~ 'Asia',
country == 'NGA' ~ 'Africa',
country == 'MDV' ~ 'Asia',
country == 'VEN' ~ 'South America',
country == 'SVK' ~ 'Europe & UK',
country == 'FJI' ~ 'Australia/Oceania',
country == 'KAZ' ~ 'Asia',
country == 'PAK' ~ 'Asia',
country == 'IDN' ~ 'Asia',
country == 'LBN' ~ 'Asia',
country == 'PHL' ~ 'Asia',
country == 'SEN' ~ 'Africa',
country == 'SYC' ~ 'Africa',
country == 'AZE' ~ 'Europe & UK',
country == 'BHR' ~ 'Asia',
country == 'NZL' ~ 'Australia/Oceania',
country == 'THA' ~ 'Asia',
country == 'DOM' ~ 'North America',
country == 'MKD' ~ 'Europe & UK',
country == 'MYS' ~ 'Asia',
country == 'ARM' ~ 'Asia',
country == 'JPN' ~ 'Asia',
country == 'LKA' ~ 'Asia',
country == 'CUB' ~ 'North America',
country == 'CMR' ~ 'Africa',
country == 'BIH' ~ 'Europe & UK',
country == 'MUS' ~ 'Africa',
country == 'COM' ~ 'Africa',
country =='SUR' ~ 'South America',
country == 'UGA' ~ 'Africa',
country == 'BGR' ~ 'Europe & UK',
country == 'CIV' ~ 'Africa',
country == 'JOR' ~ 'Asia',
country == 'SYR' ~ 'Asia',
country == 'SGP' ~ 'Asia',
country == 'BDI' ~ 'Africa',
country == 'SAU' ~ 'Asia',
country == 'VNM' ~ 'Asia',
country == 'PLW' ~ 'Australia/Oceania',
country == 'QAT' ~ 'Asia',
country == 'EGY' ~ 'Africa',
country == 'PER' ~ 'South America',
country == 'MLT' ~ 'Europe & UK',
country == 'MWI' ~ 'Africa',
country == 'ECU' ~ 'South America',
country == 'MDG' ~ 'Africa',
country == 'ISL' ~ 'Europe & UK',
country == 'UZB' ~ 'Asia',
country == 'NPL' ~ 'Asia',
country == 'BHS' ~ 'North America',
country == 'MAC' ~ 'Asia',
country == 'TGO' ~ 'Africa',
country == 'TWN' ~ 'Asia',
country == 'DJI' ~ 'Africa',
country == 'STP' ~ 'Africa',
country == 'KNA' ~ 'North America',
country == 'ETH' ~ 'Africa',
country == 'IRQ' ~ 'Asia',
country == 'HND' ~ 'North America',
country == 'RWA' ~ 'Africa',
country == 'KHM' ~ 'Asia',
country == 'MCO' ~ 'Europe & UK',
country == 'BGD' ~ 'Asia',
country == 'IMN' ~ 'Europe & UK',
country == 'TJK' ~ 'Asia',
country == 'NIC' ~ 'North America',
country == 'BEN' ~ 'Africa',
country == 'VGB' ~ 'Europe & UK',
country == 'TZA' ~ 'Africa',
country == 'GAB' ~ 'Africa',
country == 'GHA' ~ 'Africa',
country == 'TMP' ~ 'Asia',
country == 'GLP' ~ 'Europe & UK',
country == 'KEN' ~ 'Africa',
country == 'LIE' ~ 'Europe & UK',
country == 'GNB' ~ 'Africa',
country == 'MNE' ~ 'Europe & UK',
country == 'UMI' ~ 'North America',
country == 'MYT' ~ 'Africa',
country == 'FRO' ~ 'Europe & UK',
country == 'MMR' ~ 'Asia',
country == 'PAN' ~ 'North America',
country == 'BFA' ~ 'Africa',
country == 'LBY' ~ 'North America',
country == 'MLI' ~ 'Africa',
country == 'NAM' ~ 'Africa',
country == 'BOL' ~ 'South America',
country == 'PRY' ~ 'South America',
country == 'BRB' ~ 'North America',
country == 'ABW' ~ 'South America',
country == 'AIA' ~ 'Europe & UK',
country == 'SLV' ~ 'North America',
country == 'DMA' ~ 'North America',
country == 'PYF' ~ 'Australia/Oceania',
country == 'GUY' ~ 'South America',
country == 'LCA' ~ 'North America',
country == 'ATA' ~ 'Antartica',
country == 'GTM' ~ 'North America',
country == 'ASM' ~ 'Australia/Oceania',
country == 'MRT' ~ 'Africa',
country == 'NCL' ~ 'Australia/Oceania',
country == 'KIR' ~ 'Australia/Oceania',
country == 'SDN' ~ 'Africa',
country == 'ATF' ~ 'Antartica',
country == 'SLE' ~ 'Africa',
country == 'LAO' ~ 'Asia'))
booking$continent <- factor(booking$continent, levels=c("Europe & UK","North America",
"Asia", 'South America',
'Australia/Oceania', 'Antartica','Africa'), ordered=FALSE)customer.type <- booking %>%
dplyr::select(is_canceled, customer_type, hotel )%>%
group_by(is_canceled, customer_type, hotel) %>%
summarise(customer.count = n())## `summarise()` regrouping output by 'is_canceled', 'customer_type' (override with `.groups` argument)
ggplot(data = customer.type,
aes( x = reorder(customer_type, customer.count),
y = customer.count,
fill = is_canceled)) +
geom_histogram(stat = "identity", position = position_dodge(width = 1)) +
facet_wrap(~hotel)+
labs(title = 'Cancellation by Customer Type',
subtitle = 'Based on number of customers',
caption = 'Data Source: Kaggle competition')+
xlab("Customer type") +
ylab("Number of Customers [in thousands]")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
) +
scale_y_continuous(breaks = seq(0,100000,by = 25000),
labels = format(seq(0,100000,by=25000)/1000),
limits = c(0, 60000))+
geom_text(stat = "identity",position = position_dodge(width = 1), hjust = -0.1, label.size = 0.05, angle = 90, aes(label = paste(round(customer.count/1000,1), "K")))+
scale_fill_discrete(
name = "Booking Status",
breaks = c("0", "1"),
labels = c("Cancelled", "Not Cancelled"))## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Ignoring unknown parameters: label.size
revenue <- booking %>%
dplyr :: select(adr, hotel, arrival_date_month) %>%
group_by(arrival_date_month, hotel)%>%
summarise(adr = sum(adr))## `summarise()` regrouping output by 'arrival_date_month' (override with `.groups` argument)
## [1] "numeric"
#order month
order <- c ("January","February", "March", "April", "May", "June", "July", "August", "September" , "October", "November", "December" )
revenue$arrival_date_month <- factor(revenue$arrival_date_month,
levels = order)
ggplot(data = revenue,
aes( x = arrival_date_month,
y = adr,
linetype = hotel,
group = hotel,
color = hotel)) +
geom_line()+
geom_point()+
labs(title = 'Revenues by Month',
subtitle = 'Based on total of Average Daily Rate',
caption = 'Data Source: Kaggle competition')+
xlab("Month") +
ylab("Revenue [in thousands]")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
) +
scale_y_continuous(breaks = seq(0,1500000,by = 100000),
labels = format(seq(0,1500000,by = 100000)/1000))myplot <- ggplot(data = booking[booking$adr > 150 & booking$arrival_date_year == "2015" & booking$hotel == "Resort Hotel" & booking$customer_type == "Transient",],
aes( x = adr ,
y = lead_time,
color = factor(reserved_room_type)) ) +
geom_point()+
labs(title = 'Lead Time vs Average Daily Rate',
subtitle = 'Data in 2015, Resort Hotel',
caption = 'Data Source: Kaggle competition')+
xlab("Lead Time") +
ylab("Average Daily Rate (ADR)")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
) +
scale_color_discrete(
name = "Reserved room type")
myplot_int <- myplot + geom_point_interactive(aes(tooltip = factor(market_segment),
data_id = factor(market_segment)), size = 2)
ggiraph(code = print(myplot_int), hover_css = "cursor:pointer;fill:yellow;stroke:red;", zoom_max = 5,
tooltip_offx = 20, tooltip_offy = -10)request <- booking %>%
dplyr :: select(total_of_special_requests, customer_type, hotel, arrival_date_month) %>%
group_by(customer_type, hotel, arrival_date_month) %>%
summarise(total_of_special_requests = sum(total_of_special_requests))## `summarise()` regrouping output by 'customer_type', 'hotel' (override with `.groups` argument)
## [1] "January" "Februari" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
request$arrival_date_month <- factor(request$arrival_date_month,
levels = c ("January","February", "March", "April", "May", "June", "July", "August", "September" , "October", "November", "December" ))
ggplot(data = request,
aes( x = arrival_date_month,
y = total_of_special_requests,
fill = customer_type)) +
geom_bar(stat = "identity") +
facet_wrap(~hotel)+
labs(title = 'Requests by Month',
subtitle = 'Based on total number of requests by customer type',
caption = 'Data Source: Kaggle competition')+
xlab("Month") +
ylab("Number of Request [in thousands]")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
) +
guides(fill = guide_legend(title = "Customer Type",override.aes = aes(label = "")))# where are the people coming from
booking_1 <- booking[booking$reservation_status == "Check-Out",]
# Subset the data to include the countries which has more than 1500 reservation request
# otherwise including all the country with few or occassional request to avoid the graph
# from being clumsy
sub_booking <- booking_1 %>%
group_by(country) %>%
filter(n() > 1500)
ggplot(sub_booking, aes(x = adr, fill = hotel, color = hotel)) +
geom_histogram(aes(y = ..density..), position = position_dodge(), binwidth = 20 ) +
geom_density(alpha = 0.1) +
labs(title = "Average Daily rate by Hotel",
caption = 'Data Source: Kaggle competition') +
scale_color_brewer(palette = "Paired") +
xlab("Hotel Price") +
ylab("Count")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")) +
theme(legend.position = "top")dodge <- position_dodge(width = 0.4)
ggplot(data = sub_booking,
aes(x = hotel,
y = adr,
fill = factor(deposit_type))) +
geom_violin(position = dodge) +
geom_boxplot(position = dodge, width=0.1, color="black", alpha=0.2) +
#facet_wrap(~ deposit_type)+
labs(title = 'Average Daily Rate (ADR) by Hotel Type',
subtitle = 'By Deposit type',
caption = 'Data Source: Kaggle competition')+
xlab("Hotel Type") +
ylab("Average Daily Rate (ADR)")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
)+
scale_y_continuous(limits = c(0, 500)) ## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
dodge <- position_dodge(width = 0.4)
ggplot(data = sub_booking,
aes(x = hotel,
y = adr,
fill = factor(total_of_special_requests))) +
geom_violin(position = dodge) +
geom_boxplot(position = dodge, width=0.1, color="black", alpha=0.2) +
# facet_wrap(~ customer_type)+
labs(title = 'Average Daily Rate by Hotel Type',
subtitle = 'By Customer Type',
caption = 'Data Source: Kaggle competition')+
xlab("Hotel Type") +
ylab("Average Daily Rate (ADR)")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
)+
scale_y_continuous(limits = c(0, 500)) +
scale_fill_discrete(
name = "Number of requests",
breaks = c("0", "1", "2", "3", "4", "5"),
labels = c("No request", "1 request", "2 requests", "3 requests", "4 requests", "5 requests"))## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
dodge <- position_dodge(width = 0.4)
ggplot(data = sub_booking,
aes(x = factor(customer_type),
y = adr,
fill = factor(hotel))) +
geom_violin(position = dodge) +
geom_boxplot(position = dodge, width=0.1, color="black", alpha=0.2) +
# facet_wrap(~ customer_type)+
labs(title = 'Average Daily Rate by Hotel Type',
subtitle = 'By Customer Type',
caption = 'Data Source: Kaggle competition')+
xlab("Hotel Type") +
ylab("Average Daily Rate (ADR)")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
)+
scale_y_continuous(limits = c(0, 500))## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
booking$right_room <- booking$reserved_room_type == booking$assigned_room_type
booking$right_room <- as.factor(booking$right_room)
right.room <- booking %>%
dplyr:: select(right_room, hotel) %>%
group_by(hotel,right_room) %>%
summarise(n = n()) %>%
mutate( prob = n/sum(n) * 100)## `summarise()` regrouping output by 'hotel' (override with `.groups` argument)
ggplot(data = right.room,
aes(x = hotel,
y = prob,
fill = right_room))+
geom_bar(stat = "identity", position = position_dodge(width = 1)) +
# facet_wrap(~ customer_type)+
labs(title = 'Do customer get the reserved room type ',
subtitle = 'By reservation status',
caption = 'Data Source: Kaggle competition')+
ylab("% Customers")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text=element_text(face = "bold"),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust=1),
panel.grid.minor = element_line(colour = "gray",linetype = "dotted"),
panel.background=element_rect(fill = "white")
) +
geom_text(stat = "identity",position = position_dodge(width = 1),vjust = -0.5, label.size = 0.05, aes(label = paste(round(prob,2),"%"))) +
scale_y_continuous(limits = c(0, 100))## Warning: Ignoring unknown parameters: label.size
#hotel_locations <- booking %>%
# group_by(continent, country_name, hotel, adr) %>%
# arrange(desc(adr)) %>%
# summarise(n=n()) %>% head(100)
#hotel_locations$loc <- geocode_OSM(hotel_locations$country_name)
#hotel_locations$lat <- hotel_locations$loc$lat
#hotel_locations$lon <- hotel_locations$loc$lon
#leaflet(hotel_locations) %>%
# addTiles() %>%
# addCircleMarkers(~lon, ~lat, label=~as.character(country_name),
# weight = 3, fillOpacity = 0.5, radius=4)
# addProviderTiles(providers$Stamen.TonerLite) %>%
# addMiniMap(
# toggleDisplay = TRUE,
# tiles = providers$Stamen.TonerLite)#top50 <- hotel_locations %>%
# arrange(desc(adr))%>%
# head(50)
#city_hotel <- top50 %>% filter(hotel=="City Hotel")
#resort_hotel <- top50 %>% filter(hotel=="Resort Hotel")
#type_hotel <- rbind(city_hotel, resort_hotel)
#cof <- colorFactor(c("#ffa500", "#13ED3F"), domain=c("City Hotel", "Resort Hotel"))
#labels <- sprintf(
# "<strong>%s</strong><br/>%g USD",
# type_hotel$country_name, type_hotel$adr
#) %>% lapply(htmltools::HTML)
#leaflet(type_hotel) %>%
# addProviderTiles(providers$CartoDB.DarkMatter) %>%
# addCircleMarkers(~lon, ~lat, weight = 3, radius=4,
# color=~cof(hotel), stroke = F, fillOpacity = 0.5,
# label = labels) %>%
# addLegend("bottomright", colors= c("#ffa500", "#13ED3F"),
# labels=c("City Hotel", "Resort Hotel"), title="Type of Hotel") booking_distribution_channel <- booking %>%
group_by(arrival_date_week_number, distribution_channel,
continent) %>%
drop_na() %>%
summarise(n=n())## `summarise()` regrouping output by 'arrival_date_week_number', 'distribution_channel' (override with `.groups` argument)
ggplot(data=booking_distribution_channel, aes(x=arrival_date_week_number, y= n,
alluvium = distribution_channel)) +
geom_alluvium(aes(fill=distribution_channel),
alpha=.75, decreasing=FALSE) +
scale_x_continuous(breaks=seq(1,52,13)) +
theme_bw() +
scale_fill_brewer(type = "qual", palette = "Dark2") +
scale_color_brewer(type = "qual", palette = "Dark2") +
facet_wrap(~ continent, scales = "fixed") +
ggtitle("Distribution Channel In each Continent") +
xlab("Arrival Date Week Number")+
ylab("Number of Distribution Channel") +
labs(fill = "Distribution Channel") +
theme(plot.title = element_text(hjust=0.5, face="bold"),
axis.title.x = element_text(hjust=0.5, face="bold"),
axis.title.y = element_text(hjust=0.5, face="bold"))booking_month <- booking %>%
select(hotel, arrival_date_month, lead_time,
arrival_date_year,continent) %>%
drop_na(continent, arrival_date_month) %>%
group_by(arrival_date_month, hotel, continent) %>%
summarise(total_lead_time=mean(lead_time))## `summarise()` regrouping output by 'arrival_date_month', 'hotel' (override with `.groups` argument)
ggplot(booking_month, aes(fill=continent, y=total_lead_time, x=arrival_date_month)) +
geom_bar(position = "stack", stat="identity") +
scale_fill_viridis(discrete = T) +
ggtitle("Number of Lead Time to Book the Hotel Based on Continent") +
xlab("Month") +
ylab("Lead Time") +
labs(fill="Continent") +
theme(plot.title = element_text(hjust = 0.5, face="bold", size=12),
axis.text.x = element_text(angle = 40, hjust = 1),
axis.text.y = element_text(hjust = 0.5),
panel.background = element_rect(fill = "white", colour ="white",linetype = "solid"),
panel.grid.minor = element_line(colour="#A9A9A9", size=0.5))+
#geom_text(aes(label = total_lead_time), hjust = 0.5, color = 'white', size = 3) +
coord_flip() total_lead_time <- booking %>%
select(arrival_date_month, lead_time) %>%
group_by(arrival_date_month) %>%
drop_na(arrival_date_month)
ggplot(total_lead_time, aes(x=arrival_date_month, y=lead_time,
fill=as.factor(arrival_date_month))) +
geom_boxplot() +
# scale_fill_viridis(discrete = TRUE, alpha=0.6) +
ggtitle("Boxplot of Lead Time") +
ylab("Total Number of Visitors") +
xlab("") +
theme(legend.position = "none",
plot.title = element_text(face="bold", hjust=.5),
axis.text.x = element_text(size = 12, angle = 90,
hjust = .5, vjust = .5, face = "bold"),
axis.text.y = element_text(size = 12, angle = 0,
hjust = 1, vjust = 0, face = "bold"),
axis.title.x = element_text(size = 12, angle = 0,
hjust = .5, vjust = 0, face = "bold"),
axis.title.y = element_text(size = 12, angle = 90,
hjust = .5, vjust = .5, face = "bold"),
axis.ticks = element_line(colour = "#808080", size = 2),
axis.line = element_line(colour = "#808080", linetype ="solid",size = 1),
panel.grid.major = element_line(colour = "#808080"),
panel.grid.major.x = element_line(size=1, colour="#808080"),
panel.grid.minor.y = element_line(size = 1, colour = "#808080"),
panel.background = element_rect(fill = "white", colour = "white"))+
coord_flip()booking_quarter <- booking %>%
select(continent,quarter, adults, children, babies) %>%
mutate(total_visitors=adults+children,babies) %>%
group_by(continent, quarter) %>%
drop_na(continent) %>%
summarise(total = sum(total_visitors)) ## `summarise()` regrouping output by 'continent' (override with `.groups` argument)
booking_quarter$tooltips <- paste(
"Continent :", booking_quarter$continent,
"\nNumofPpl :", booking_quarter$total,
"\nQuarter : ", booking_quarter$quarter,
sep="") %>%
sapply(htmltools::HTML)
plot0 <-ggplot(booking_quarter, aes(fill=continent,
y=total, x=quarter, text=tooltips))+
geom_area(aes(fill=continent, group=continent)) +
scale_fill_viridis(discrete = TRUE) +
ggtitle("Number of people staying in Hotel by Quarter") +
ylab("Number of People") +
xlab("Quarter") +
labs(fill="Continent") +
theme(plot.title = element_text(hjust=0.5, face="bold"),
panel.background = element_rect(fill = "white", colour ="white",linetype = "solid"),
panel.grid.minor = element_line(colour="#A9A9A9", size=0.5))
plot1 <- ggplotly(plot0, tooltip="tooltips")## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
booking_market <- booking %>%
select(market_segment,adults, children, babies) %>%
mutate(total_visitors=adults+children,babies) %>%
group_by(market_segment) %>%
summarise(total = sum(total_visitors)) %>%
drop_na(market_segment, total) %>%
mutate(prop=round(total/sum(total)*100,2)) %>%
mutate(ypos=cumsum(prop)-0.5*prop)## `summarise()` ungrouping output (override with `.groups` argument)
booking_market$pos = (cumsum(c(0, booking_market$prop)) + c(booking_market$ypos/ 2, .01))[1:nrow(booking_market)]
ggplot(booking_market, aes(x="",y=prop, fill=as.factor(market_segment))) +
geom_bar(stat="identity", width = 1, color="white") +
coord_polar("y", start=0) +
theme_void() +
labs(fill="Market Segment") +
geom_text(aes(x=1.6, label=paste(prop, "%")),
position = position_stack(vjust = 0.5)) +
scale_fill_brewer(palette="Set1") +
ggtitle('Market Segment for Hotel') +
theme(plot.title = element_text(hjust = 0.5, face="bold", size=16)) booking_europe <- booking %>%
filter(continent == "Europe & UK") %>%
mutate(total_visitors=adults+children,babies,
total_days = stays_in_week_nights + stays_in_weekend_nights) %>%
filter(adr<=1000 & adr>=0) %>%
mutate(adr_price = adr*100000)
booking_europe %>%
arrange(desc(adr_price)) %>%
ggplot(aes(x=total_visitors, y=total_days, size=adr_price, fill=as.factor(continent))) +
geom_point(alpha=0.5, shape=21, color="black") +
scale_size(range = c(.1, 24), name="Total Visitors") +
theme(legend.position="none") +
ylab("Averate Daily Rate") +
xlab("Lead Time") +
ggtitle('Correlation between Average Daily Rate vs Lead Time') +
theme(plot.title = element_text(hjust=0.5),
axis.ticks = element_line(colour = "#808080", size = 2),
axis.line = element_line(colour = "#808080", linetype ="solid",size = 1),
panel.grid.major = element_line(colour = "#808080"),
panel.grid.major.x = element_line(size=1, colour="#808080"),
panel.grid.minor.y = element_line(size = 1, colour = "#808080"),
panel.background = element_rect(fill = "white", colour = "white"))## Warning: Removed 4 rows containing missing values (geom_point).
This plot shows comparison average daily rate with meal from 2015-2017 . From bed breakfast, full board, half board, self catering and undefined.
First we would like to talks about the full board, Full board means hotel provides bed, breakfast, packed
lunch and evening meal. Full board showed with the black bards, Full board has the highest demand in 2017, the average daily rate of the hotel which provides of full board are the cheapest compare the other type of the meals.
Then we would like to talk about bed breakfast, bed breakfast means people who stay in private family homes with the host living there. It showed with blue bars However, the number of Bed breakfast are less than Full Board. The average daily price of the hotel that provides this type of breakfast is the most expensive.
From this plot, It means that people prefer to have full board because it’s more comfortable for them also it has the lowest average daily rate in the hotel
num_18 <- booking %>%
select(continent, meal, arrival_date_year, adr) %>%
group_by(arrival_date_year, meal) %>%
filter(adr<=1000 & adr>=0) %>%
drop_na(continent) %>%
summarise(n=n(),
price= mean(adr*1000))## `summarise()` regrouping output by 'arrival_date_year' (override with `.groups` argument)
num_18$meal <- ifelse(num_18$meal=="BB","Bed_Breakfast",
ifelse(num_18$meal=="SC","Self_Catering",ifelse(
num_18$meal=="HB","Half_Board",
ifelse(num_18$meal=="FB","Full_Board","Undefined"))))
ggplot(num_18, aes(x=arrival_date_year, y=price, fill=meal, color=meal)) +
geom_bar(aes(x=arrival_date_year, y=price),stat="identity",position="dodge") +
geom_line(aes(x=arrival_date_year, y=2.5*n), linetype="solid", size=2) +
xlab("Arrival Date Year") +
ylab("Price") +
labs(fill="Meal", color="Avg Daily Rate", subtitle ="2015-2017") +
ggtitle("Average daily rate in Meal") +
scale_x_continuous(breaks = c(2015,2016,2017)) +
scale_fill_manual(values = c("Bed_Breakfast" = "darkblue", "Full_Board" = "black",
"Half_Board" = "darkgreen", "Self_Catering" = "maroon", "Undefined"="gray"))+
theme(legend.position = "right",
plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5, face="bold"),
panel.grid.major = element_line(colour = "#808080"),
panel.grid.major.x = element_line(size=1, colour="#808080"),
panel.grid.minor.y = element_line(size = 1, colour = "#808080"),
panel.background = element_rect(fill = "white", colour = "white"))coeff <- 10
data_adult_family <- booking %>%
mutate(family= adults+children+babies) %>%
group_by(arrival_date_week_number) %>%
summarise(family_num = sum(family),
adult_num = sum(adults)) %>%
drop_na(arrival_date_week_number)## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data_adult_family, aes(x=arrival_date_week_number, fill=adult_num)) +
geom_line(aes(y=family_num), size=2, color="#FC4E07") +
geom_bar( aes(y=adult_num), stat="identity") +
scale_y_continuous(
name = "Total Number of Families",
sec.axis = sec_axis(~.*coeff, name="Total Number of Adults")) +
labs(fill="Number of Adults") +
theme(legend.position="right",
axis.title.y = element_text(size=13,hjust=0.5),
axis.title.y.right = element_text(size=13, hjust=.5),
plot.title = element_text(hjust=0.5),
axis.text.x = element_text(size = 12, angle = 90,
hjust = .5, vjust = .5, face = "bold"),
axis.title.x = element_text(size = 12, angle = 0,
hjust = .5, vjust = 0, face = "bold"),
axis.ticks = element_line(colour = "#808080", size = 2),
axis.line = element_line(colour = "#808080", linetype ="solid",size = 1),
panel.grid.major = element_line(colour = "#808080"),
panel.grid.major.x = element_line(size=1, colour="#808080"),
panel.grid.minor.y = element_line(size = 1, colour = "#808080"),
panel.background = element_rect(fill = "white", colour = "white")) +
ggtitle("Number of Adults Vs Family") +
xlab("Week Number")