# load libraries
library(tidyverse)
library(dplyr)
library(readxl)
# load dataset
setwd("C:/Users/chesl/Desktop/DATA110")
df <- read_excel("Airbnb_DC_25.csv")
# clean dataset
subset <- df |>
filter((price < 6000)) # remove outliers and NAassignment_6
# graph no.1
plot <- subset |>
ggplot(aes(x = longitude, y = latitude, colour = room_type)) +
geom_point(alpha = 0.5) +
labs(
title = "Map of Airbnb locations in Washington, D.C., 2025",
x = "Longitude",
y = "Latitude",
colour = "Room type",
caption = "dataset source: Airbnb_DC_25.csv")
plotThis scatterplot shows the geographic position of all Airbnb locations in Washington, D.C. (in 2025), their type of room indicated by color. The points create a contour of the Washington city map.Airbnb locations are most densely concentrated in Capitol Hill and downtown D.C., whereas the empty spots correlate to the location of parks and Potomac River. The most common Airbnb room type is entire home/apartment, followed by private rooms.
# graph no.2
plot2 <- ggplot(subset, aes(x = price, y = number_of_reviews, colour = room_type)) +
geom_point(alpha = 0.5) +
geom_vline(xintercept = mean(subset$price)) +
labs(
title = "Airbnb locations in Washington, D.C., 2025, by price and number of reviews",
x = "Price",
y = "Number of reviews",
colour = "Room type",
caption = "dataset source: Airbnb_DC_25.csv")
plot2This scatterplot shows the relationship between price and number of reviews for Airbnb locations in Washington, D.C.. The majority of locations cost between $100–$250 per night, private rooms costing less on average than entire homes/apartments. Interestingly, homes with the most reviews tend to cost less than the most expensive locations.