The dataset that is used in this analysis is taken from http://insideairbnb.com/get-the-data/ from “Singapore” section
Library
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readr)
library(skimr)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(dplyr)
library(ggplot2)
library(stringr)
Import Data
#Import the airbnb singapore dataset into RStudio
airbnb_sg <- read_csv("D:/DATA SCIENCE/Data Science Project/Dataset/airbnb_singapore.csv")
## Rows: 3443 Columns: 75
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (31): listing_url, last_scraped, source, name, description, neighborhood...
## dbl (37): id, scrape_id, host_id, host_listings_count, host_total_listings_c...
## lgl (7): host_is_superhost, host_has_profile_pic, host_identity_verified, b...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Import the review for airbnb singapore dataset into RStudio
review_listing <- read_csv("D:/DATA SCIENCE/Data Science Project/R AirBnb Project/reviews.csv")
## Rows: 34825 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): date, reviewer_name, comments
## dbl (3): listing_id, id, reviewer_id
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Checking The Dataset Structure
#View the structure of airbnb dataset
str(airbnb_sg)
## spec_tbl_df [3,443 × 75] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:3443] 71609 71896 71903 275343 275344 ...
## $ listing_url : chr [1:3443] "https://www.airbnb.com/rooms/71609" "https://www.airbnb.com/rooms/71896" "https://www.airbnb.com/rooms/71903" "https://www.airbnb.com/rooms/275343" ...
## $ scrape_id : num [1:3443] 2.02e+13 2.02e+13 2.02e+13 2.02e+13 2.02e+13 ...
## $ last_scraped : chr [1:3443] "6/29/2023" "6/28/2023" "6/29/2023" "6/28/2023" ...
## $ source : chr [1:3443] "city scrape" "city scrape" "city scrape" "city scrape" ...
## $ name : chr [1:3443] "Villa in Singapore · ★4.44 · 2 bedrooms · 3 beds · 1 private bath" "Home in Singapore · ★4.16 · 1 bedroom · 1 bed · Shared half-bath" "Home in Singapore · ★4.41 · 1 bedroom · 2 beds · Shared half-bath" "Rental unit in Singapore · ★4.40 · 1 bedroom · 1 bed · 2 shared baths" ...
## $ description : chr [1:3443] "For 3 rooms.Book room 1&2 and room 4<br /><br /><b>The space</b><br />Landed Homestay Room for Rental. Between "| __truncated__ "<b>The space</b><br />Vocational Stay Deluxe Bedroom in Singapore.(Near Airport) <br /> <br />Located Between "| __truncated__ "Like your own home, 24hrs access.<br /><br /><b>The space</b><br />Vocational Stay Deluxe Bedroom in Singapore."| __truncated__ "**IMPORTANT NOTES: READ BEFORE YOU BOOK! <br />==Since this is an HDB Flat tourists are NOT ALLOWED unless hav"| __truncated__ ...
## $ neighborhood_overview : chr [1:3443] NA NA "Quiet and view of the playground with exercise tracks with access to neighbourhood Simwi Estate." NA ...
## $ picture_url : chr [1:3443] "https://a0.muscache.com/pictures/24453191/35803acb_original.jpg" "https://a0.muscache.com/pictures/2440674/ac4f4442_original.jpg" "https://a0.muscache.com/pictures/568743/7bc623e9_original.jpg" "https://a0.muscache.com/pictures/miso/Hosting-275343/original/0239b8f0-a116-4118-889c-41f23df6303d.jpeg" ...
## $ host_id : num [1:3443] 367042 367042 367042 1439258 1439258 ...
## $ host_url : chr [1:3443] "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/367042" "https://www.airbnb.com/users/show/1439258" ...
## $ host_name : chr [1:3443] "Belinda" "Belinda" "Belinda" "Kay" ...
## $ host_since : chr [1:3443] "1/29/2011" "1/29/2011" "1/29/2011" "11/24/2011" ...
## $ host_location : chr [1:3443] "Singapore" "Singapore" "Singapore" "Singapore" ...
## $ host_about : chr [1:3443] "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "Hi My name is Belinda -Housekeeper \n\nI would like to welcome you to my \"Homestay Website\" \n\n\nAccomodatio"| __truncated__ "K2 Guesthouse is designed for guests who want a truly local experience with local people. Experience eating loc"| __truncated__ ...
## $ host_response_time : chr [1:3443] "within an hour" "within an hour" "within an hour" "N/A" ...
## $ host_response_rate : chr [1:3443] "100%" "100%" "100%" "N/A" ...
## $ host_acceptance_rate : chr [1:3443] "100%" "100%" "100%" "N/A" ...
## $ host_is_superhost : logi [1:3443] NA NA NA NA NA NA ...
## $ host_thumbnail_url : chr [1:3443] "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_small" "https://a0.muscache.com/im/pictures/user/7245b0a9-27fa-4759-9fb3-59ae8299e8a3.jpg?aki_policy=profile_small" ...
## $ host_picture_url : chr [1:3443] "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/users/367042/profile_pic/1382521511/original.jpg?aki_policy=profile_x_medium" "https://a0.muscache.com/im/pictures/user/7245b0a9-27fa-4759-9fb3-59ae8299e8a3.jpg?aki_policy=profile_x_medium" ...
## $ host_neighbourhood : chr [1:3443] "Tampines" "Tampines" "Tampines" "Bukit Merah" ...
## $ host_listings_count : num [1:3443] 5 5 5 51 51 5 7 51 51 7 ...
## $ host_total_listings_count : num [1:3443] 15 15 15 56 56 15 8 56 56 8 ...
## $ host_verifications : chr [1:3443] "['email', 'phone']" "['email', 'phone']" "['email', 'phone']" "['email', 'phone']" ...
## $ host_has_profile_pic : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ host_identity_verified : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ neighbourhood : chr [1:3443] NA NA "Singapore, Singapore" NA ...
## $ neighbourhood_cleansed : chr [1:3443] "Tampines" "Tampines" "Tampines" "Bukit Merah" ...
## $ neighbourhood_group_cleansed : chr [1:3443] "East Region" "East Region" "East Region" "Central Region" ...
## $ latitude : num [1:3443] 1.35 1.35 1.35 1.29 1.29 ...
## $ longitude : num [1:3443] 104 104 104 104 104 ...
## $ property_type : chr [1:3443] "Private room in villa" "Private room in home" "Private room in home" "Private room in rental unit" ...
## $ room_type : chr [1:3443] "Private room" "Private room" "Private room" "Private room" ...
## $ accommodates : num [1:3443] 3 1 2 1 1 4 2 1 1 2 ...
## $ bathrooms : logi [1:3443] NA NA NA NA NA NA ...
## $ bathrooms_text : chr [1:3443] "1 private bath" "Shared half-bath" "Shared half-bath" "2 shared baths" ...
## $ bedrooms : num [1:3443] NA NA NA NA NA 3 NA NA NA NA ...
## $ beds : num [1:3443] 3 1 2 1 1 5 1 1 1 1 ...
## $ amenities : chr [1:3443] "[\"Kitchen\", \"Keypad\", \"Shared hot tub\", \"Children\\u2019s books and toys\", \"Wifi\", \"Essentials\", \""| __truncated__ "[\"Shower gel\", \"Kitchen\", \"Cleaning products\", \"Keypad\", \"Drying rack for clothing\", \"Shared hot tub"| __truncated__ "[\"Kitchen\", \"Keypad\", \"Shared hot tub\", \"Wifi\", \"Essentials\", \"Dryer\", \"Heating\", \"Hot water\", "| __truncated__ "[\"Kitchen\", \"Keypad\", \"Bed linens\", \"Carbon monoxide alarm\", \"Wifi\", \"Hot water kettle\", \"Air cond"| __truncated__ ...
## $ price : chr [1:3443] "$139.00" "$70.00" "$81.00" "$55.00" ...
## $ minimum_nights : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
## $ maximum_nights : num [1:3443] 1125 1125 1125 999 999 ...
## $ minimum_minimum_nights : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
## $ maximum_minimum_nights : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
## $ minimum_maximum_nights : num [1:3443] 1125 1125 1125 999 999 ...
## $ maximum_maximum_nights : num [1:3443] 1125 1125 1125 999 999 ...
## $ minimum_nights_avg_ntm : num [1:3443] 92 92 92 90 60 92 92 60 60 92 ...
## $ maximum_nights_avg_ntm : num [1:3443] 1125 1125 1125 999 999 ...
## $ calendar_updated : logi [1:3443] NA NA NA NA NA NA ...
## $ has_availability : logi [1:3443] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ availability_30 : num [1:3443] 28 30 28 0 0 28 30 27 0 30 ...
## $ availability_60 : num [1:3443] 58 60 58 0 0 58 60 27 0 60 ...
## $ availability_90 : num [1:3443] 88 90 88 26 0 88 90 27 24 90 ...
## $ availability_365 : num [1:3443] 363 365 363 301 57 363 365 235 299 365 ...
## $ calendar_last_scraped : chr [1:3443] "6/29/2023" "6/28/2023" "6/29/2023" "6/28/2023" ...
## $ number_of_reviews : num [1:3443] 20 24 47 22 15 12 133 17 5 81 ...
## $ number_of_reviews_ltm : num [1:3443] 0 0 0 1 2 0 0 1 2 0 ...
## $ number_of_reviews_l30d : num [1:3443] 0 0 0 0 0 0 0 0 1 0 ...
## $ first_review : chr [1:3443] "12/19/2011" "7/30/2011" "5/4/2011" "4/20/2013" ...
## $ last_review : chr [1:3443] "1/17/2020" "10/13/2019" "1/9/2020" "8/13/2022" ...
## $ review_scores_rating : num [1:3443] 4.44 4.16 4.41 4.4 4.54 4.83 4.43 3.62 4.5 4.43 ...
## $ review_scores_accuracy : num [1:3443] 4.37 4.22 4.39 4.16 4.64 4.67 4.33 3.63 4.5 4.45 ...
## $ review_scores_cleanliness : num [1:3443] 4 4.09 4.52 4.26 4.21 4.75 4.16 4 4.25 4.41 ...
## $ review_scores_checkin : num [1:3443] 4.63 4.43 4.63 4.47 4.64 4.58 4.5 4.5 4.75 4.71 ...
## $ review_scores_communication : num [1:3443] 4.78 4.43 4.64 4.42 4.57 4.67 4.66 4.06 4.75 4.76 ...
## $ review_scores_location : num [1:3443] 4.26 4.17 4.5 4.53 4.64 4.33 4.52 4 4.25 4.64 ...
## $ review_scores_value : num [1:3443] 4.32 4.04 4.36 4.63 4.43 4.45 4.39 3.88 4.5 4.55 ...
## $ license : chr [1:3443] NA NA NA NA ...
## $ instant_bookable : logi [1:3443] FALSE FALSE FALSE TRUE TRUE TRUE ...
## $ calculated_host_listings_count : num [1:3443] 5 5 5 51 51 5 7 51 51 7 ...
## $ calculated_host_listings_count_entire_homes : num [1:3443] 0 0 0 2 2 0 1 2 2 1 ...
## $ calculated_host_listings_count_private_rooms: num [1:3443] 5 5 5 49 49 5 6 49 49 6 ...
## $ calculated_host_listings_count_shared_rooms : num [1:3443] 0 0 0 0 0 0 0 0 0 0 ...
## $ reviews_per_month : num [1:3443] 0.14 0.17 0.32 0.18 0.11 0.09 0.96 0.13 0.04 0.68 ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_double(),
## .. listing_url = col_character(),
## .. scrape_id = col_double(),
## .. last_scraped = col_character(),
## .. source = col_character(),
## .. name = col_character(),
## .. description = col_character(),
## .. neighborhood_overview = col_character(),
## .. picture_url = col_character(),
## .. host_id = col_double(),
## .. host_url = col_character(),
## .. host_name = col_character(),
## .. host_since = col_character(),
## .. host_location = col_character(),
## .. host_about = col_character(),
## .. host_response_time = col_character(),
## .. host_response_rate = col_character(),
## .. host_acceptance_rate = col_character(),
## .. host_is_superhost = col_logical(),
## .. host_thumbnail_url = col_character(),
## .. host_picture_url = col_character(),
## .. host_neighbourhood = col_character(),
## .. host_listings_count = col_double(),
## .. host_total_listings_count = col_double(),
## .. host_verifications = col_character(),
## .. host_has_profile_pic = col_logical(),
## .. host_identity_verified = col_logical(),
## .. neighbourhood = col_character(),
## .. neighbourhood_cleansed = col_character(),
## .. neighbourhood_group_cleansed = col_character(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. property_type = col_character(),
## .. room_type = col_character(),
## .. accommodates = col_double(),
## .. bathrooms = col_logical(),
## .. bathrooms_text = col_character(),
## .. bedrooms = col_double(),
## .. beds = col_double(),
## .. amenities = col_character(),
## .. price = col_character(),
## .. minimum_nights = col_double(),
## .. maximum_nights = col_double(),
## .. minimum_minimum_nights = col_double(),
## .. maximum_minimum_nights = col_double(),
## .. minimum_maximum_nights = col_double(),
## .. maximum_maximum_nights = col_double(),
## .. minimum_nights_avg_ntm = col_double(),
## .. maximum_nights_avg_ntm = col_double(),
## .. calendar_updated = col_logical(),
## .. has_availability = col_logical(),
## .. availability_30 = col_double(),
## .. availability_60 = col_double(),
## .. availability_90 = col_double(),
## .. availability_365 = col_double(),
## .. calendar_last_scraped = col_character(),
## .. number_of_reviews = col_double(),
## .. number_of_reviews_ltm = col_double(),
## .. number_of_reviews_l30d = col_double(),
## .. first_review = col_character(),
## .. last_review = col_character(),
## .. review_scores_rating = col_double(),
## .. review_scores_accuracy = col_double(),
## .. review_scores_cleanliness = col_double(),
## .. review_scores_checkin = col_double(),
## .. review_scores_communication = col_double(),
## .. review_scores_location = col_double(),
## .. review_scores_value = col_double(),
## .. license = col_character(),
## .. instant_bookable = col_logical(),
## .. calculated_host_listings_count = col_double(),
## .. calculated_host_listings_count_entire_homes = col_double(),
## .. calculated_host_listings_count_private_rooms = col_double(),
## .. calculated_host_listings_count_shared_rooms = col_double(),
## .. reviews_per_month = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#View the structure of review dataset
str(review_listing)
## spec_tbl_df [34,825 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ listing_id : num [1:34825] 71609 71609 71609 71609 71609 ...
## $ id : num [1:34825] 793880 1731810 2162194 2190615 3221837 ...
## $ date : chr [1:34825] "12/19/2011" "7/17/2012" "9/1/2012" "9/4/2012" ...
## $ reviewer_id : num [1:34825] 1456140 1804182 3113461 1432123 2759938 ...
## $ reviewer_name: chr [1:34825] "Max" "Zac" "Zahra" "Helmut" ...
## $ comments : chr [1:34825] "The rooms were clean and tidy. Beds very comfortable.\r<br/>Thank you Belinda." "Good space and quite an interesting home in a quiet location - a lovely park across the road which is rare in S"| __truncated__ "It was a comfortable place. Belinda was a kind and helpful woman who helped us have a nice accommodation there "| __truncated__ "We are four mature age travellers and stayed for one night. We had a wonderfully warm reception on arrival and "| __truncated__ ...
## - attr(*, "spec")=
## .. cols(
## .. listing_id = col_double(),
## .. id = col_double(),
## .. date = col_character(),
## .. reviewer_id = col_double(),
## .. reviewer_name = col_character(),
## .. comments = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Selecting the column that will be used in the analysis
new_airbnb_sg <- airbnb_sg %>%
select(
"id",
"listing_url",
"name",
"description",
"neighborhood_overview",
"host_id",
"host_url",
"host_name",
"host_since",
"host_location",
"host_response_time",
"host_response_rate",
"host_is_superhost",
"host_identity_verified",
"property_type",
"room_type",
"accommodates",
"bathrooms_text",
"bedrooms",
"beds",
"price",
"availability_30",
"availability_60",
"availability_90",
"availability_365",
"review_scores_cleanliness",
"review_scores_location"
)
Cleaning the data by removing certain rows that have NA and converting column Price into Numeric for further analysis
new_clean_airbnb_sg <-
#converting price column from character to numeric
mutate(new_airbnb_sg,new_price=parse_number(new_airbnb_sg$price)) %>%
filter(host_identity_verified !=FALSE) %>%
drop_na(review_scores_cleanliness) %>%
clean_names()
#Viewing the clean dataset
View(new_clean_airbnb_sg)
In order to calculate revenue, I created list of new columns : -Revenue_30 -Revenue_60 -Revenue_90 -Revenue_365
The purpose of these new columns was to calculate revenue of airbnb operators.The assumption for the revenue was the price multiply by the non available nights.
airbnb_sg_revenue <-
mutate(new_clean_airbnb_sg,revenue_30=new_price*(30-availability_30),
revenue_60=new_price * (60-availability_60),
revenue_90=new_price * (90-availability_90),
revenue_1year = new_price * (365-availability_365)
) %>%
select(-price)
view(airbnb_sg_revenue)
The Analysis Phase #1.Top 10 airbnb by revenue per month
airbnb_top10_month <- airbnb_sg_revenue %>%
select(id,listing_url,revenue_30) %>%
arrange(-revenue_30)
#2.Top 10 owner with largest revenue per year
airbnb_top10_owner <- airbnb_sg_revenue %>%
group_by(host_name,property_type) %>%
drop_na() %>%
summarize(annual_revenue=sum(revenue_1year)) %>%
arrange(-annual_revenue)
## `summarise()` has grouped output by 'host_name'. You can override using the
## `.groups` argument.
view(airbnb_top10_owner)
head(airbnb_top10_owner,10)
## # A tibble: 10 × 3
## # Groups: host_name [9]
## host_name property_type annual_revenue
## <chr> <chr> <dbl>
## 1 Beary Best Hostel Room in hostel 40273406
## 2 Syafiqah Entire rental unit 5309290
## 3 Jay Entire condo 1723805
## 4 Angie Entire serviced apartment 1409603
## 5 Jan Entire serviced apartment 945114
## 6 Yiyi Entire serviced apartment 517180
## 7 Jay Entire rental unit 480343
## 8 Sweet Home Room in hotel 398544
## 9 Nicolas Entire rental unit 365000
## 10 Pan Pacific Serviced Suites Entire serviced apartment 339388
#3.Top 10 property type that most people like to booked (per year)
airbnb_top10propertytype <- airbnb_sg_revenue %>%
group_by(property_type) %>%
drop_na() %>%
summarize(annual_revenue=sum(revenue_1year)) %>%
arrange(-annual_revenue)
view(airbnb_top10propertytype)
head(airbnb_top10propertytype,10)
## # A tibble: 10 × 2
## property_type annual_revenue
## <chr> <dbl>
## 1 Room in hostel 40495346
## 2 Entire rental unit 7812132
## 3 Entire serviced apartment 4157526
## 4 Entire condo 2647697
## 5 Room in hotel 727468
## 6 Room in boutique hotel 321846
## 7 Private room in hostel 262537
## 8 Private room in serviced apartment 227059
## 9 Private room in condo 110595
## 10 Room in aparthotel 100415
#4.Statistical information
summary(airbnb_sg_revenue)
## id listing_url name description
## Min. :7.161e+04 Length:1782 Length:1782 Length:1782
## 1st Qu.:2.042e+07 Class :character Class :character Class :character
## Median :3.600e+07 Mode :character Mode :character Mode :character
## Mean :1.728e+17
## 3rd Qu.:5.338e+07
## Max. :9.113e+17
##
## neighborhood_overview host_id host_url
## Length:1782 Min. : 23666 Length:1782
## Class :character 1st Qu.: 23336011 Class :character
## Mode :character Median : 68059127 Mode :character
## Mean :138568769
## 3rd Qu.:211210302
## Max. :515748996
##
## host_name host_since host_location host_response_time
## Length:1782 Length:1782 Length:1782 Length:1782
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## host_response_rate host_is_superhost host_identity_verified property_type
## Length:1782 Mode :logical Mode:logical Length:1782
## Class :character FALSE:919 TRUE:1782 Class :character
## Mode :character TRUE :181 Mode :character
## NA's :682
##
##
##
## room_type accommodates bathrooms_text bedrooms
## Length:1782 Min. : 1.000 Length:1782 Min. :1.000
## Class :character 1st Qu.: 1.000 Class :character 1st Qu.:1.000
## Mode :character Median : 2.000 Mode :character Median :1.000
## Mean : 2.884 Mean :1.324
## 3rd Qu.: 4.000 3rd Qu.:1.000
## Max. :16.000 Max. :4.000
## NA's :815
## beds availability_30 availability_60 availability_90
## Min. : 1.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 1.00 1st Qu.:12.00 1st Qu.:27.00
## Median : 1.000 Median :20.00 Median :45.00 Median :72.00
## Mean : 1.873 Mean :16.51 Mean :36.73 Mean :57.67
## 3rd Qu.: 2.000 3rd Qu.:29.00 3rd Qu.:59.00 3rd Qu.:88.00
## Max. :36.000 Max. :30.00 Max. :60.00 Max. :90.00
## NA's :32
## availability_365 review_scores_cleanliness review_scores_location
## Min. : 0.0 Min. :1.000 Min. :1.000
## 1st Qu.:163.0 1st Qu.:4.330 1st Qu.:4.600
## Median :317.0 Median :4.700 Median :4.880
## Mean :250.2 Mean :4.525 Mean :4.715
## 3rd Qu.:363.0 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :365.0 Max. :5.000 Max. :5.000
## NA's :1
## new_price revenue_30 revenue_60 revenue_90
## Min. : 22.0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 69.0 1st Qu.: 118 1st Qu.: 123 1st Qu.: 138
## Median : 148.0 Median : 1380 Median : 1974 Median : 2700
## Mean : 297.3 Mean : 4889 Mean : 8920 Mean : 12776
## 3rd Qu.: 243.0 3rd Qu.: 4200 3rd Qu.: 6641 3rd Qu.: 9000
## Max. :109999.0 Max. :3299970 Max. :6599940 Max. :9899910
##
## revenue_1year
## Min. : 0
## 1st Qu.: 324
## Median : 6779
## Mean : 52474
## 3rd Qu.: 32485
## Max. :40149635
##
Combining review table with listing table to gain insights of dirty ratings in airbnb
airbnb_sg_revenue_review <- inner_join(airbnb_sg_revenue,review_listing,by=c('id'='listing_id'))
view(airbnb_sg_revenue_review)
List of airbnb_operator with dirtiest rating
airbnb_top10dirtiest <- airbnb_sg_revenue_review %>%
group_by(host_id,host_url,host_name) %>%
summarize(average_rating_cleanliness = mean(review_scores_cleanliness)) %>%
arrange(average_rating_cleanliness)
## `summarise()` has grouped output by 'host_id', 'host_url'. You can override
## using the `.groups` argument.
Data Visualization #Top 10 Listings by Revenue Per Month
viz_top10month<- head(airbnb_top10_month,10)
ggplot(data=viz_top10month,aes(x=reorder(as.factor(id),-revenue_30),y=revenue_30))+
geom_bar(stat = "identity",position="dodge",fill="blue")+
labs(title="Top 10 Listings by Revenue per Month",x="Listing Id",y="Revenue Per Month")+
theme_minimal()+
scale_fill_brewer(palette = 'Set3')
#Top 10 Property Type By Revenue Per Year
viz_top10property <- head(airbnb_top10propertytype,10)
ggplot(data=viz_top10property,aes(x=reorder(str_wrap(property_type,width=10),-annual_revenue),y=annual_revenue))+geom_bar(stat = "identity",position="dodge",fill="green")+
labs(title="Top 10 Property Type by Revenue per Year",x="Property Type",y="Revenue Per Year")+
theme_minimal()+scale_fill_brewer(palette = 'Set3')