The dataset that is used in this analysis is taken from http://insideairbnb.com/get-the-data/ from “Singapore” section

Library

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readr)
library(skimr)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
library(ggplot2)
library(stringr)

Import Data

#Import the airbnb singapore dataset into RStudio
airbnb_sg <- read_csv("D:/DATA SCIENCE/Data Science Project/Dataset/airbnb_singapore.csv")
## Rows: 3443 Columns: 75
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (31): listing_url, last_scraped, source, name, description, neighborhood...
## dbl (37): id, scrape_id, host_id, host_listings_count, host_total_listings_c...
## lgl  (7): host_is_superhost, host_has_profile_pic, host_identity_verified, b...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Import the review for airbnb singapore dataset into RStudio
review_listing <- read_csv("D:/DATA SCIENCE/Data Science Project/R AirBnb Project/reviews.csv")
## Rows: 34825 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, reviewer_name, comments
## dbl (3): listing_id, id, reviewer_id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Checking The Dataset Structure

#View the structure of airbnb dataset
str(airbnb_sg)
## spec_tbl_df [3,443 × 75] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id                                          : num [1:3443] 71609 71896 71903 275343 275344 ...
##  $ listing_url                                 : chr [1:3443] "https://www.airbnb.com/rooms/71609" "https://www.airbnb.com/rooms/71896" "https://www.airbnb.com/rooms/71903" "https://www.airbnb.com/rooms/275343" ...
##  $ scrape_id                                   : num [1:3443] 2.02e+13 2.02e+13 2.02e+13 2.02e+13 2.02e+13 ...
##  $ last_scraped                                : chr [1:3443] "6/29/2023" "6/28/2023" "6/29/2023" "6/28/2023" ...
##  $ source                                      : chr [1:3443] "city scrape" "city scrape" "city scrape" "city scrape" ...
##  $ name                                        : chr [1:3443] "Villa in Singapore · ★4.44 · 2 bedrooms · 3 beds · 1 private bath" "Home in Singapore · ★4.16 · 1 bedroom · 1 bed · Shared half-bath" "Home in Singapore · ★4.41 · 1 bedroom · 2 beds · Shared half-bath" "Rental unit in Singapore · ★4.40 · 1 bedroom · 1 bed · 2 shared baths" ...
##  $ description                                 : chr [1:3443] "For 3 rooms.Book room 1&2 and room 4<br /><br /><b>The space</b><br />Landed Homestay Room for Rental. Between "| __truncated__ "<b>The space</b><br />Vocational Stay Deluxe Bedroom in Singapore.(Near Airport) <br /> <br />Located Between  "| __truncated__ "Like your own home, 24hrs access.<br /><br /><b>The space</b><br />Vocational Stay Deluxe Bedroom in Singapore."| __truncated__ "**IMPORTANT NOTES:  READ BEFORE YOU BOOK! <br />==Since this is an HDB Flat tourists are NOT ALLOWED unless hav"| __truncated__ ...
##  $ neighborhood_overview                       : chr [1:3443] NA NA "Quiet and view of the playground with exercise tracks with access to neighbourhood Simwi Estate." NA ...
##  $ picture_url                                 : chr [1:3443] "https://a0.muscache.com/pictures/24453191/35803acb_original.jpg" "https://a0.muscache.com/pictures/2440674/ac4f4442_original.jpg" "https://a0.muscache.com/pictures/568743/7bc623e9_original.jpg" "https://a0.muscache.com/pictures/miso/Hosting-275343/original/0239b8f0-a116-4118-889c-41f23df6303d.jpeg" ...
##  $ host_id                                     : num [1:3443] 367042 367042 367042 1439258 1439258 ...
##  $ host_url                                    : chr [1:3443] "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/1439258" ...
##  $ host_name                                   : chr [1:3443] "Belinda" "Belinda" "Belinda" "Kay" ...
##  $ host_since                                  : chr [1:3443] "1/29/2011" "1/29/2011" "1/29/2011" "11/24/2011" ...
##  $ host_location                               : chr [1:3443] "Singapore" "Singapore" "Singapore" "Singapore" ...
##  $ host_about                                  : chr [1:3443] "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "K2 Guesthouse is designed for guests who want a truly local experience with local people. Experience eating loc"| __truncated__ ...
##  $ host_response_time                          : chr [1:3443] "within an hour" "within an hour" "within an hour" "N/A" ...
##  $ host_response_rate                          : chr [1:3443] "100%" "100%" "100%" "N/A" ...
##  $ host_acceptance_rate                        : chr [1:3443] "100%" "100%" "100%" "N/A" ...
##  $ host_is_superhost                           : logi [1:3443] NA NA NA NA NA NA ...
##  $ host_thumbnail_url                          : chr [1:3443] "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/pictures/user/7245b0a9-27fa-4759-9fb3-59ae8299e8a3.jpg?aki_policy=profile_small" ...
##  $ host_picture_url                            : chr [1:3443] "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/pictures/user/7245b0a9-27fa-4759-9fb3-59ae8299e8a3.jpg?aki_policy=profile_x_medium" ...
##  $ host_neighbourhood                          : chr [1:3443] "Tampines" "Tampines" "Tampines" "Bukit Merah" ...
##  $ host_listings_count                         : num [1:3443] 5 5 5 51 51 5 7 51 51 7 ...
##  $ host_total_listings_count                   : num [1:3443] 15 15 15 56 56 15 8 56 56 8 ...
##  $ host_verifications                          : chr [1:3443] "['email', 'phone']" "['email', 'phone']" "['email', 'phone']" "['email', 'phone']" ...
##  $ host_has_profile_pic                        : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ host_identity_verified                      : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ neighbourhood                               : chr [1:3443] NA NA "Singapore, Singapore" NA ...
##  $ neighbourhood_cleansed                      : chr [1:3443] "Tampines" "Tampines" "Tampines" "Bukit Merah" ...
##  $ neighbourhood_group_cleansed                : chr [1:3443] "East Region" "East Region" "East Region" "Central Region" ...
##  $ latitude                                    : num [1:3443] 1.35 1.35 1.35 1.29 1.29 ...
##  $ longitude                                   : num [1:3443] 104 104 104 104 104 ...
##  $ property_type                               : chr [1:3443] "Private room in villa" "Private room in home" "Private room in home" "Private room in rental unit" ...
##  $ room_type                                   : chr [1:3443] "Private room" "Private room" "Private room" "Private room" ...
##  $ accommodates                                : num [1:3443] 3 1 2 1 1 4 2 1 1 2 ...
##  $ bathrooms                                   : logi [1:3443] NA NA NA NA NA NA ...
##  $ bathrooms_text                              : chr [1:3443] "1 private bath" "Shared half-bath" "Shared half-bath" "2 shared baths" ...
##  $ bedrooms                                    : num [1:3443] NA NA NA NA NA 3 NA NA NA NA ...
##  $ beds                                        : num [1:3443] 3 1 2 1 1 5 1 1 1 1 ...
##  $ amenities                                   : chr [1:3443] "[\"Kitchen\", \"Keypad\", \"Shared hot tub\", \"Children\\u2019s books and toys\", \"Wifi\", \"Essentials\", \""| __truncated__ "[\"Shower gel\", \"Kitchen\", \"Cleaning products\", \"Keypad\", \"Drying rack for clothing\", \"Shared hot tub"| __truncated__ "[\"Kitchen\", \"Keypad\", \"Shared hot tub\", \"Wifi\", \"Essentials\", \"Dryer\", \"Heating\", \"Hot water\", "| __truncated__ "[\"Kitchen\", \"Keypad\", \"Bed linens\", \"Carbon monoxide alarm\", \"Wifi\", \"Hot water kettle\", \"Air cond"| __truncated__ ...
##  $ price                                       : chr [1:3443] "$139.00" "$70.00" "$81.00" "$55.00" ...
##  $ minimum_nights                              : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
##  $ maximum_nights                              : num [1:3443] 1125 1125 1125 999 999 ...
##  $ minimum_minimum_nights                      : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
##  $ maximum_minimum_nights                      : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
##  $ minimum_maximum_nights                      : num [1:3443] 1125 1125 1125 999 999 ...
##  $ maximum_maximum_nights                      : num [1:3443] 1125 1125 1125 999 999 ...
##  $ minimum_nights_avg_ntm                      : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
##  $ maximum_nights_avg_ntm                      : num [1:3443] 1125 1125 1125 999 999 ...
##  $ calendar_updated                            : logi [1:3443] NA NA NA NA NA NA ...
##  $ has_availability                            : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ availability_30                             : num [1:3443] 28 30 28 0 0 28 30 27 0 30 ...
##  $ availability_60                             : num [1:3443] 58 60 58 0 0 58 60 27 0 60 ...
##  $ availability_90                             : num [1:3443] 88 90 88 26 0 88 90 27 24 90 ...
##  $ availability_365                            : num [1:3443] 363 365 363 301 57 363 365 235 299 365 ...
##  $ calendar_last_scraped                       : chr [1:3443] "6/29/2023" "6/28/2023" "6/29/2023" "6/28/2023" ...
##  $ number_of_reviews                           : num [1:3443] 20 24 47 22 15 12 133 17 5 81 ...
##  $ number_of_reviews_ltm                       : num [1:3443] 0 0 0 1 2 0 0 1 2 0 ...
##  $ number_of_reviews_l30d                      : num [1:3443] 0 0 0 0 0 0 0 0 1 0 ...
##  $ first_review                                : chr [1:3443] "12/19/2011" "7/30/2011" "5/4/2011" "4/20/2013" ...
##  $ last_review                                 : chr [1:3443] "1/17/2020" "10/13/2019" "1/9/2020" "8/13/2022" ...
##  $ review_scores_rating                        : num [1:3443] 4.44 4.16 4.41 4.4 4.54 4.83 4.43 3.62 4.5 4.43 ...
##  $ review_scores_accuracy                      : num [1:3443] 4.37 4.22 4.39 4.16 4.64 4.67 4.33 3.63 4.5 4.45 ...
##  $ review_scores_cleanliness                   : num [1:3443] 4 4.09 4.52 4.26 4.21 4.75 4.16 4 4.25 4.41 ...
##  $ review_scores_checkin                       : num [1:3443] 4.63 4.43 4.63 4.47 4.64 4.58 4.5 4.5 4.75 4.71 ...
##  $ review_scores_communication                 : num [1:3443] 4.78 4.43 4.64 4.42 4.57 4.67 4.66 4.06 4.75 4.76 ...
##  $ review_scores_location                      : num [1:3443] 4.26 4.17 4.5 4.53 4.64 4.33 4.52 4 4.25 4.64 ...
##  $ review_scores_value                         : num [1:3443] 4.32 4.04 4.36 4.63 4.43 4.45 4.39 3.88 4.5 4.55 ...
##  $ license                                     : chr [1:3443] NA NA NA NA ...
##  $ instant_bookable                            : logi [1:3443] FALSE FALSE FALSE TRUE TRUE TRUE ...
##  $ calculated_host_listings_count              : num [1:3443] 5 5 5 51 51 5 7 51 51 7 ...
##  $ calculated_host_listings_count_entire_homes : num [1:3443] 0 0 0 2 2 0 1 2 2 1 ...
##  $ calculated_host_listings_count_private_rooms: num [1:3443] 5 5 5 49 49 5 6 49 49 6 ...
##  $ calculated_host_listings_count_shared_rooms : num [1:3443] 0 0 0 0 0 0 0 0 0 0 ...
##  $ reviews_per_month                           : num [1:3443] 0.14 0.17 0.32 0.18 0.11 0.09 0.96 0.13 0.04 0.68 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   listing_url = col_character(),
##   ..   scrape_id = col_double(),
##   ..   last_scraped = col_character(),
##   ..   source = col_character(),
##   ..   name = col_character(),
##   ..   description = col_character(),
##   ..   neighborhood_overview = col_character(),
##   ..   picture_url = col_character(),
##   ..   host_id = col_double(),
##   ..   host_url = col_character(),
##   ..   host_name = col_character(),
##   ..   host_since = col_character(),
##   ..   host_location = col_character(),
##   ..   host_about = col_character(),
##   ..   host_response_time = col_character(),
##   ..   host_response_rate = col_character(),
##   ..   host_acceptance_rate = col_character(),
##   ..   host_is_superhost = col_logical(),
##   ..   host_thumbnail_url = col_character(),
##   ..   host_picture_url = col_character(),
##   ..   host_neighbourhood = col_character(),
##   ..   host_listings_count = col_double(),
##   ..   host_total_listings_count = col_double(),
##   ..   host_verifications = col_character(),
##   ..   host_has_profile_pic = col_logical(),
##   ..   host_identity_verified = col_logical(),
##   ..   neighbourhood = col_character(),
##   ..   neighbourhood_cleansed = col_character(),
##   ..   neighbourhood_group_cleansed = col_character(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   property_type = col_character(),
##   ..   room_type = col_character(),
##   ..   accommodates = col_double(),
##   ..   bathrooms = col_logical(),
##   ..   bathrooms_text = col_character(),
##   ..   bedrooms = col_double(),
##   ..   beds = col_double(),
##   ..   amenities = col_character(),
##   ..   price = col_character(),
##   ..   minimum_nights = col_double(),
##   ..   maximum_nights = col_double(),
##   ..   minimum_minimum_nights = col_double(),
##   ..   maximum_minimum_nights = col_double(),
##   ..   minimum_maximum_nights = col_double(),
##   ..   maximum_maximum_nights = col_double(),
##   ..   minimum_nights_avg_ntm = col_double(),
##   ..   maximum_nights_avg_ntm = col_double(),
##   ..   calendar_updated = col_logical(),
##   ..   has_availability = col_logical(),
##   ..   availability_30 = col_double(),
##   ..   availability_60 = col_double(),
##   ..   availability_90 = col_double(),
##   ..   availability_365 = col_double(),
##   ..   calendar_last_scraped = col_character(),
##   ..   number_of_reviews = col_double(),
##   ..   number_of_reviews_ltm = col_double(),
##   ..   number_of_reviews_l30d = col_double(),
##   ..   first_review = col_character(),
##   ..   last_review = col_character(),
##   ..   review_scores_rating = col_double(),
##   ..   review_scores_accuracy = col_double(),
##   ..   review_scores_cleanliness = col_double(),
##   ..   review_scores_checkin = col_double(),
##   ..   review_scores_communication = col_double(),
##   ..   review_scores_location = col_double(),
##   ..   review_scores_value = col_double(),
##   ..   license = col_character(),
##   ..   instant_bookable = col_logical(),
##   ..   calculated_host_listings_count = col_double(),
##   ..   calculated_host_listings_count_entire_homes = col_double(),
##   ..   calculated_host_listings_count_private_rooms = col_double(),
##   ..   calculated_host_listings_count_shared_rooms = col_double(),
##   ..   reviews_per_month = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
#View the structure of review dataset
str(review_listing)
## spec_tbl_df [34,825 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ listing_id   : num [1:34825] 71609 71609 71609 71609 71609 ...
##  $ id           : num [1:34825] 793880 1731810 2162194 2190615 3221837 ...
##  $ date         : chr [1:34825] "12/19/2011" "7/17/2012" "9/1/2012" "9/4/2012" ...
##  $ reviewer_id  : num [1:34825] 1456140 1804182 3113461 1432123 2759938 ...
##  $ reviewer_name: chr [1:34825] "Max" "Zac" "Zahra" "Helmut" ...
##  $ comments     : chr [1:34825] "The rooms were clean and tidy. Beds very comfortable.\r<br/>Thank you Belinda." "Good space and quite an interesting home in a quiet location - a lovely park across the road which is rare in S"| __truncated__ "It was a comfortable place. Belinda was a kind and helpful woman who helped us have a nice accommodation there "| __truncated__ "We are four mature age travellers and stayed for one night. We had a wonderfully warm reception on arrival and "| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   listing_id = col_double(),
##   ..   id = col_double(),
##   ..   date = col_character(),
##   ..   reviewer_id = col_double(),
##   ..   reviewer_name = col_character(),
##   ..   comments = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Selecting the column that will be used in the analysis

new_airbnb_sg <- airbnb_sg %>% 
  select(
    "id",
    "listing_url",
    "name",
    "description",
    "neighborhood_overview",
    "host_id",
    "host_url",
    "host_name",
    "host_since",
    "host_location",
    "host_response_time",
    "host_response_rate",
    "host_is_superhost",
    "host_identity_verified",
    "property_type",
    "room_type",
    "accommodates",
    "bathrooms_text",
    "bedrooms",
    "beds",
    "price",
    "availability_30",
    "availability_60",
    "availability_90",
    "availability_365",
    "review_scores_cleanliness",
    "review_scores_location"
  )

Cleaning the data by removing certain rows that have NA and converting column Price into Numeric for further analysis

new_clean_airbnb_sg <- 
  #converting price column from character to numeric
  mutate(new_airbnb_sg,new_price=parse_number(new_airbnb_sg$price)) %>% 
  filter(host_identity_verified !=FALSE) %>% 
  drop_na(review_scores_cleanliness) %>% 
  clean_names()
#Viewing the clean dataset
View(new_clean_airbnb_sg)

In order to calculate revenue, I created list of new columns : -Revenue_30 -Revenue_60 -Revenue_90 -Revenue_365

The purpose of these new columns was to calculate revenue of airbnb operators.The assumption for the revenue was the price multiply by the non available nights.

airbnb_sg_revenue <- 
  mutate(new_clean_airbnb_sg,revenue_30=new_price*(30-availability_30),
         revenue_60=new_price * (60-availability_60),
         revenue_90=new_price * (90-availability_90),
         revenue_1year = new_price * (365-availability_365)
         ) %>% 
  select(-price)

view(airbnb_sg_revenue)

The Analysis Phase #1.Top 10 airbnb by revenue per month

airbnb_top10_month <- airbnb_sg_revenue %>% 
  select(id,listing_url,revenue_30) %>% 
  arrange(-revenue_30)

#2.Top 10 owner with largest revenue per year

airbnb_top10_owner <- airbnb_sg_revenue %>%
  group_by(host_name,property_type) %>% 
  drop_na() %>% 
  summarize(annual_revenue=sum(revenue_1year)) %>% 
  arrange(-annual_revenue)
## `summarise()` has grouped output by 'host_name'. You can override using the
## `.groups` argument.
view(airbnb_top10_owner)
head(airbnb_top10_owner,10)
## # A tibble: 10 × 3
## # Groups:   host_name [9]
##    host_name                   property_type             annual_revenue
##    <chr>                       <chr>                              <dbl>
##  1 Beary Best Hostel           Room in hostel                  40273406
##  2 Syafiqah                    Entire rental unit               5309290
##  3 Jay                         Entire condo                     1723805
##  4 Angie                       Entire serviced apartment        1409603
##  5 Jan                         Entire serviced apartment         945114
##  6 Yiyi                        Entire serviced apartment         517180
##  7 Jay                         Entire rental unit                480343
##  8 Sweet Home                  Room in hotel                     398544
##  9 Nicolas                     Entire rental unit                365000
## 10 Pan Pacific Serviced Suites Entire serviced apartment         339388

#3.Top 10 property type that most people like to booked (per year)

airbnb_top10propertytype <- airbnb_sg_revenue %>% 
  group_by(property_type) %>% 
  drop_na() %>% 
  summarize(annual_revenue=sum(revenue_1year)) %>% 
  arrange(-annual_revenue)
view(airbnb_top10propertytype)
head(airbnb_top10propertytype,10)
## # A tibble: 10 × 2
##    property_type                      annual_revenue
##    <chr>                                       <dbl>
##  1 Room in hostel                           40495346
##  2 Entire rental unit                        7812132
##  3 Entire serviced apartment                 4157526
##  4 Entire condo                              2647697
##  5 Room in hotel                              727468
##  6 Room in boutique hotel                     321846
##  7 Private room in hostel                     262537
##  8 Private room in serviced apartment         227059
##  9 Private room in condo                      110595
## 10 Room in aparthotel                         100415

#4.Statistical information

summary(airbnb_sg_revenue)
##        id            listing_url            name           description       
##  Min.   :7.161e+04   Length:1782        Length:1782        Length:1782       
##  1st Qu.:2.042e+07   Class :character   Class :character   Class :character  
##  Median :3.600e+07   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1.728e+17                                                           
##  3rd Qu.:5.338e+07                                                           
##  Max.   :9.113e+17                                                           
##                                                                              
##  neighborhood_overview    host_id            host_url        
##  Length:1782           Min.   :    23666   Length:1782       
##  Class :character      1st Qu.: 23336011   Class :character  
##  Mode  :character      Median : 68059127   Mode  :character  
##                        Mean   :138568769                     
##                        3rd Qu.:211210302                     
##                        Max.   :515748996                     
##                                                              
##   host_name          host_since        host_location      host_response_time
##  Length:1782        Length:1782        Length:1782        Length:1782       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  host_response_rate host_is_superhost host_identity_verified property_type     
##  Length:1782        Mode :logical     Mode:logical           Length:1782       
##  Class :character   FALSE:919         TRUE:1782              Class :character  
##  Mode  :character   TRUE :181                                Mode  :character  
##                     NA's :682                                                  
##                                                                                
##                                                                                
##                                                                                
##   room_type          accommodates    bathrooms_text        bedrooms    
##  Length:1782        Min.   : 1.000   Length:1782        Min.   :1.000  
##  Class :character   1st Qu.: 1.000   Class :character   1st Qu.:1.000  
##  Mode  :character   Median : 2.000   Mode  :character   Median :1.000  
##                     Mean   : 2.884                      Mean   :1.324  
##                     3rd Qu.: 4.000                      3rd Qu.:1.000  
##                     Max.   :16.000                      Max.   :4.000  
##                                                         NA's   :815    
##       beds        availability_30 availability_60 availability_90
##  Min.   : 1.000   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 1.000   1st Qu.: 1.00   1st Qu.:12.00   1st Qu.:27.00  
##  Median : 1.000   Median :20.00   Median :45.00   Median :72.00  
##  Mean   : 1.873   Mean   :16.51   Mean   :36.73   Mean   :57.67  
##  3rd Qu.: 2.000   3rd Qu.:29.00   3rd Qu.:59.00   3rd Qu.:88.00  
##  Max.   :36.000   Max.   :30.00   Max.   :60.00   Max.   :90.00  
##  NA's   :32                                                      
##  availability_365 review_scores_cleanliness review_scores_location
##  Min.   :  0.0    Min.   :1.000             Min.   :1.000         
##  1st Qu.:163.0    1st Qu.:4.330             1st Qu.:4.600         
##  Median :317.0    Median :4.700             Median :4.880         
##  Mean   :250.2    Mean   :4.525             Mean   :4.715         
##  3rd Qu.:363.0    3rd Qu.:5.000             3rd Qu.:5.000         
##  Max.   :365.0    Max.   :5.000             Max.   :5.000         
##                                             NA's   :1             
##    new_price          revenue_30        revenue_60        revenue_90     
##  Min.   :    22.0   Min.   :      0   Min.   :      0   Min.   :      0  
##  1st Qu.:    69.0   1st Qu.:    118   1st Qu.:    123   1st Qu.:    138  
##  Median :   148.0   Median :   1380   Median :   1974   Median :   2700  
##  Mean   :   297.3   Mean   :   4889   Mean   :   8920   Mean   :  12776  
##  3rd Qu.:   243.0   3rd Qu.:   4200   3rd Qu.:   6641   3rd Qu.:   9000  
##  Max.   :109999.0   Max.   :3299970   Max.   :6599940   Max.   :9899910  
##                                                                          
##  revenue_1year     
##  Min.   :       0  
##  1st Qu.:     324  
##  Median :    6779  
##  Mean   :   52474  
##  3rd Qu.:   32485  
##  Max.   :40149635  
## 

Combining review table with listing table to gain insights of dirty ratings in airbnb

airbnb_sg_revenue_review <- inner_join(airbnb_sg_revenue,review_listing,by=c('id'='listing_id'))
view(airbnb_sg_revenue_review)

List of airbnb_operator with dirtiest rating

airbnb_top10dirtiest <- airbnb_sg_revenue_review %>%
  group_by(host_id,host_url,host_name) %>% 
  summarize(average_rating_cleanliness = mean(review_scores_cleanliness)) %>% 
  arrange(average_rating_cleanliness)
## `summarise()` has grouped output by 'host_id', 'host_url'. You can override
## using the `.groups` argument.

Data Visualization #Top 10 Listings by Revenue Per Month

viz_top10month<- head(airbnb_top10_month,10)
ggplot(data=viz_top10month,aes(x=reorder(as.factor(id),-revenue_30),y=revenue_30))+
  geom_bar(stat = "identity",position="dodge",fill="blue")+
  labs(title="Top 10 Listings by Revenue per Month",x="Listing Id",y="Revenue Per Month")+
  theme_minimal()+
  scale_fill_brewer(palette = 'Set3')

#Top 10 Property Type By Revenue Per Year

viz_top10property <- head(airbnb_top10propertytype,10)
ggplot(data=viz_top10property,aes(x=reorder(str_wrap(property_type,width=10),-annual_revenue),y=annual_revenue))+geom_bar(stat = "identity",position="dodge",fill="green")+
  labs(title="Top 10 Property Type by Revenue per Year",x="Property Type",y="Revenue Per Year")+
  theme_minimal()+scale_fill_brewer(palette = 'Set3')