This dataset contains information on Airbnb Listings in New York City in 2019.Airbnb is one of the main ways of accomodation for travelers travelling to not only New York but many other parts of the world. This data set has 16 columns and 48895 observations.This dataset was originally compiled and made available by Inside Airbnb, an independent website that scrapes Airbnb’s website to provide public data on Airbnb listings in cities around the world. This dataset allows us to compare statistics for a wide range of airbnbs with different price points and locations. First I am going to see the average price of airbnb listing in each neighborhood groups. Then I will look at the relationship between the reviews and the prices
‘Price’ : The Price per night of the Airbnb listing USD
‘number_of_reviews’ : The total numver of reviews for the Airbnb listing
‘neighbourhood_group’ : The neighborhood in which the Airbnb listing is located
‘latitude’/‘longitude’: The cordinates of the airbnb listings
‘room_type’ : The type of room avaialbe for the Airbnb listing
‘minimum_nights’ : The minimum number of nights required to book the listing
‘host_name’ is the person who owns the property
The data collection methodology for this dataset is collected through Airbnb’s internal platforms.I chose this dataset because I am interested in going to see some of these airbnb’s when I visit New York. This dataset also provides valuable insights into the dynamics of short-term rental markets in urban areas like New York City. According to the article in npr.org it discusses the potential impact of new short term regulations on the Airbnb market in New York City. There are new regulations which are set to go into effect in early 2023. this can have an impact on this dataset as it could result in a decrease in the number of Airbnb listings available in the city which could in turn affect the accuracy and completeness of the dataset. Reference- https://www.npr.org/2022/12/28/1145709106/nyc-could-lose-10-000-airbnb-listings-because-of-new-short-term-rental-regulatio
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(tmap)
## Warning: package 'tmap' was built under R version 4.2.3
library(tmaptools)
## Warning: package 'tmaptools' was built under R version 4.2.3
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.2.3
library(sf)
## Warning: package 'sf' was built under R version 4.2.3
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.2.3
library(dplyr)
library(rio)
## Warning: package 'rio' was built under R version 4.2.3
library(sp)
## Warning: package 'sp' was built under R version 4.2.3
setwd("C:/Users/amani/OneDrive/Desktop/Data110")
airbnb_ny19 <- read_csv("airbnb_ny19.csv")
## Rows: 48895 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): name, host_name, neighbourhood_group, neighbourhood, room_type, la...
## dbl (10): id, host_id, latitude, longitude, price, minimum_nights, number_of...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(magick)
## Warning: package 'magick' was built under R version 4.2.3
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
library(jpeg)
airbnb_image <- readJPEG("airbnb.jpeg")
plot(0:1, 0:1, type = "n", xaxt = "n", yaxt = "n", bty = "n", ylab = "", xlab = "")
rasterImage(airbnb_image, 0.2, 0.2, 0.8, 0.8)
airbnb_ny19 <- airbnb_ny19 %>% drop_na()
price_by_neighborhood <- airbnb_ny19 %>%
group_by(neighbourhood_group) %>%
summarize(avg_price = mean(price))
ggplot(data = airbnb_ny19, aes(x = neighbourhood_group, y = price)) +
geom_bar(stat = "summary", fun = "mean", fill = "#FFC0CB") +
labs(title = "Average Airbnb Listing Price by Neighborhood Group",
x = "Neighborhood Group",
y = "Average Price ($)") +
theme_minimal()
airbnb_reviews_price <- airbnb_ny19 %>%
select(price, number_of_reviews)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:rio':
##
## export
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(airbnb_ny19, aes(x = number_of_reviews, y = price)) +
geom_point(alpha = 0.6, color = "purple") +
labs(x = "Number of Reviews", y = "Price") +
theme_bw()
ggplotly(p)
library(dplyr)
library(ggplot2)
airbnb_ny19_filtered <- airbnb_ny19 %>%
filter(price > 50 & number_of_reviews > 100)
airbnb_ny19_filtered %>%
ggplot(aes(x = price, y = number_of_reviews)) +
geom_point(alpha = 0.2, size = 2, color = "#FF5733") +
geom_smooth(method = "lm", se = FALSE, color = "black") +
scale_y_log10() +
labs(title = "Price vs. Number of Reviews on Airbnb Listings in NYC",
x = "Price (USD)",
y = "Number of Reviews")
## `geom_smooth()` using formula = 'y ~ x'
airbnb_ny19_filtered <- lm(number_of_reviews ~ price, data = airbnb_ny19_filtered)
summary(airbnb_ny19_filtered)
##
## Call:
## lm(formula = number_of_reviews ~ price, data = airbnb_ny19_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.17 -42.97 -18.50 26.38 374.53
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.14752 2.14484 79.795 < 2e-16 ***
## price -0.05732 0.01332 -4.302 1.75e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.56 on 2706 degrees of freedom
## Multiple R-squared: 0.006792, Adjusted R-squared: 0.006425
## F-statistic: 18.51 on 1 and 2706 DF, p-value: 1.755e-05
mypal <- colorNumeric(palette = "Purples", domain = airbnb_ny19$price)
#Defining the name and price of the airbnb listing to create interactive pop-up windows
mypopup <- paste("<strong>Name:</strong> ", airbnb_ny19$name,
"<br><strong>Price:</strong> $", airbnb_ny19$price)
mymap <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(data = airbnb_ny19,
lat = ~latitude,
lng = ~longitude,
fillColor = ~mypal(price),
color = "pink",
fillOpacity = 0.9,
weight = 1,
radius = 3,
popup = mypopup) %>%
addLegend(pal = mypal,
values = airbnb_ny19$price,
position = "bottomright",
title = "Price",
labFormat = labelFormat(prefix = "$"))
mymap