This dataset contains information on Airbnb Listings in New York City in 2019. It includes various variables related to the listings such as the price, number of reviews, neighbourgood,minimum nights longitude and latitude, room type, availability and many other variables.
‘Price’ : The Price per night of the Airbnb listing USD
‘number_of_reviews’ : The total numver of reviews for the Airbnb listing
‘neighbourhood_group’ : The neighborhood in which the Airbnb listing is located
‘latitude’/‘longitude’: The cordinates of the airbnb listings
‘room_type’ : The type of room avaialbe for the Airbnb listing
‘minimum_nights’ : The minimum number of nights required to book the listing
‘host_name’ is the person who owns the property
library(tidyverse)
library(ggfortify)
library(htmltools)
library(plotly)
library(ggplot2)
setwd("C:/Users/amani/OneDrive/Desktop/Data110")
airbnb_ny19 <- read_csv("airbnb_ny19.csv")
head(airbnb_ny19)
## # A tibble: 6 × 16
## id name host_id host_…¹ neigh…² neigh…³ latit…⁴ longi…⁵ room_…⁶ price
## <dbl> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 2539 Clean & q… 2787 John Brookl… Kensin… 40.6 -74.0 Privat… 149
## 2 2595 Skylit Mi… 2845 Jennif… Manhat… Midtown 40.8 -74.0 Entire… 225
## 3 3647 THE VILLA… 4632 Elisab… Manhat… Harlem 40.8 -73.9 Privat… 150
## 4 3831 Cozy Enti… 4869 LisaRo… Brookl… Clinto… 40.7 -74.0 Entire… 89
## 5 5022 Entire Ap… 7192 Laura Manhat… East H… 40.8 -73.9 Entire… 80
## 6 5099 Large Coz… 7322 Chris Manhat… Murray… 40.7 -74.0 Entire… 200
## # … with 6 more variables: minimum_nights <dbl>, number_of_reviews <dbl>,
## # last_review <chr>, reviews_per_month <dbl>,
## # calculated_host_listings_count <dbl>, availability_365 <dbl>, and
## # abbreviated variable names ¹host_name, ²neighbourhood_group,
## # ³neighbourhood, ⁴latitude, ⁵longitude, ⁶room_type
summary(airbnb_ny19)
## id name host_id host_name
## Min. : 2539 Length:48895 Min. : 2438 Length:48895
## 1st Qu.: 9471945 Class :character 1st Qu.: 7822033 Class :character
## Median :19677284 Mode :character Median : 30793816 Mode :character
## Mean :19017143 Mean : 67620011
## 3rd Qu.:29152178 3rd Qu.:107434423
## Max. :36487245 Max. :274321313
##
## neighbourhood_group neighbourhood latitude longitude
## Length:48895 Length:48895 Min. :40.50 Min. :-74.24
## Class :character Class :character 1st Qu.:40.69 1st Qu.:-73.98
## Mode :character Mode :character Median :40.72 Median :-73.96
## Mean :40.73 Mean :-73.95
## 3rd Qu.:40.76 3rd Qu.:-73.94
## Max. :40.91 Max. :-73.71
##
## room_type price minimum_nights number_of_reviews
## Length:48895 Min. : 0.0 Min. : 1.00 Min. : 0.00
## Class :character 1st Qu.: 69.0 1st Qu.: 1.00 1st Qu.: 1.00
## Mode :character Median : 106.0 Median : 3.00 Median : 5.00
## Mean : 152.7 Mean : 7.03 Mean : 23.27
## 3rd Qu.: 175.0 3rd Qu.: 5.00 3rd Qu.: 24.00
## Max. :10000.0 Max. :1250.00 Max. :629.00
##
## last_review reviews_per_month calculated_host_listings_count
## Length:48895 Min. : 0.010 Min. : 1.000
## Class :character 1st Qu.: 0.190 1st Qu.: 1.000
## Mode :character Median : 0.720 Median : 1.000
## Mean : 1.373 Mean : 7.144
## 3rd Qu.: 2.020 3rd Qu.: 2.000
## Max. :58.500 Max. :327.000
## NA's :10052
## availability_365
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 45.0
## Mean :112.8
## 3rd Qu.:227.0
## Max. :365.0
##
avg_price <- airbnb_ny19 %>%
group_by(neighbourhood_group) %>%
summarize(avg_price = mean(price))
library(dplyr)
ggplot(avg_price, aes(x = neighbourhood_group, y = avg_price, fill = neighbourhood_group)) +
geom_bar(stat = "identity", width = 0.5) +
scale_fill_manual(values = c("#FF69F5", "#FF69B4", "#FF1493", "#C71585", "#FFB6C1")) +
labs(title = "Average Price by Neighborhood Group",
x = "Neighborhood Group",
y = "Average Price",
fill = "Neighborhood Group") +
theme_minimal() +
theme(legend.position = "bottom",
panel.background = element_rect(fill = "Grey"))
library(ggplot2)
airbnb_ny19 <- airbnb_ny19 %>%
filter(number_of_reviews >0)
ggplot(data = airbnb_ny19, aes(x = number_of_reviews, y = price, color = neighbourhood_group)) +
geom_point(alpha = 0.7, size = 2) +
labs(title = "Airbnb Listings in New York City",
x = "Number of Reviews",
y = "Price (USD)")+
scale_y_continuous(limits = c(0, 2000)) +
theme_minimal() +
theme(plot.title = element_text(size = 20, face = "bold"),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12))
## Warning: Removed 35 rows containing missing values (`geom_point()`).
ggplot(airbnb_ny19, aes(x = longitude, y = latitude, color = neighbourhood_group, size = price)) +
geom_point(alpha = 0.2) +
theme_dark(base_size = 10) +
scale_color_manual(values = c("darkgreen", "purple", "yellow", "white", "blue")) +
scale_size_continuous(name = "Price", range = c(1, 10)) +
labs(color = "Neighborhood Group") +
ggtitle("Airbnb Listings in New York City")