Data Visualization

Introduction

The purpose of this report is to demonstrate how Python can be used to explore and visualize real-world Airbnb data for Washington, DC. Using the listings.csv, neighbourhoods.csv, and reviews.csv files, all code and figures below are generated directly inside this R Markdown document using Python chunks.

Heatmap of Reviews by Year and Month

The following heatmap shows how Airbnb review activity in Washington, DC varies across years and months.
Each cell represents the total number of reviews in a given month and year, with darker/redder colors indicating periods of higher booking and guest activity.
This visualization helps highlight seasonal patterns and longer-term trends in demand over time.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

folder = r"C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC"
os.chdir(folder)

reviews = pd.read_csv("reviews.csv", low_memory=False)

reviews = reviews.copy()
reviews['date'] = pd.to_datetime(reviews['date'], errors='coerce')
reviews = reviews.dropna(subset=['date'])
reviews['year']  = reviews['date'].dt.year
reviews['month'] = reviews['date'].dt.month

agg = (
    reviews
    .groupby(['year', 'month'])
    .size()
    .rename('count')
    .reset_index()
)

all_months = list(range(1, 13))
all_years  = sorted(agg['year'].unique())
heat = (
    agg
    .pivot(index='year', columns='month', values='count')
    .reindex(index=all_years, columns=all_months)
    .fillna(0)
)
heat = heat.sort_index(ascending=False)

annot_text = np.vectorize(lambda v: f"{int(round(v)):,}")(heat.values)

plt.figure(figsize=(12, 8))
ax = sns.heatmap(
    heat,
    cmap='coolwarm',
    linewidths=.4,
    linecolor='white',
    cbar_kws={'label': 'Number of Reviews'},
    annot=annot_text,
    fmt='',
)
ax.set_title('Heatmap of the Number of Reviews by Year and Month', fontsize=16, pad=12)
ax.set_xlabel('Month', fontsize=12)
ax.set_ylabel('Year', fontsize=12)
ax.set_xticklabels([str(m) for m in heat.columns], rotation=0)
ax.set_yticklabels([str(y) for y in heat.index], rotation=0)
plt.tight_layout()

outdir = Path(folder)
outdir.mkdir(parents=True, exist_ok=True)
outfile = outdir / "heatmap_year_month_reviews.png"
plt.savefig(outfile, dpi=300, bbox_inches='tight')
print(f"Saved to: {outfile}")

## Saved to: C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC\heatmap_year_month_reviews.png

plt.show()

Interactive Map of Listings with Price Bands

This interactive map displays Washington, DC Airbnb listings as clustered circle markers, colored by nightly price bands. The underlying heat layer highlights areas with the highest concentration of listings, weighted by price, revealing key hotspots. Together, these views help compare affordability and spatial density across neighborhoods in a single, intuitive visualization.

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Path to your data folder
data_folder = r"C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC"

# Read CSVs using full paths (no os.chdir)
listings = pd.read_csv(os.path.join(data_folder, "listings.csv"), low_memory=False)
reviews  = pd.read_csv(os.path.join(data_folder, "reviews.csv"),  low_memory=False)

# Clean price
price = listings['price'].astype(str).str.replace(r'[\$,]', '', regex=True)
listings['price_num'] = pd.to_numeric(price, errors='coerce')
listings['price_num'].fillna(listings['price_num'].median(), inplace=True)

## <string>:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
## The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
## 
## For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.

# Parse review dates
reviews['date'] = pd.to_datetime(reviews['date'], errors='coerce')
reviews = reviews.dropna(subset=['date']).copy()
reviews['month'] = reviews['date'].dt.month          # 1..12
reviews['dow']   = reviews['date'].dt.dayofweek      # 0=Mon..6=Sun

# Attach price to each review
rev_val = reviews.merge(
    listings[['id','price_num']].rename(columns={'id':'listing_id'}),
    on='listing_id',
    how='left'
)
rev_val['value'] = rev_val['price_num'].fillna(0)

# Build Month × Weekday matrix
month_week = (rev_val
              .groupby(['month','dow'])['value']
              .sum()
              .unstack('dow', fill_value=0)
              .reindex(index=range(1,13), columns=range(7), fill_value=0))

print("Check totals per month (should be > 0):")

## Check totals per month (should be > 0):

print(month_week.sum(axis=1))

## month
## 1     3389667.0
## 2     3462128.0
## 3     5739821.0
## 4     6216559.0
## 5     6507832.0
## 6     5947869.0
## 7     6194675.0
## 8     6028095.0
## 9     5162715.0
## 10    5589351.0
## 11    4091749.0
## 12    3665167.0
## dtype: float64


# (keep your plotting code below here)

3.Stacked Bar Chart: Total Listings Value by Month and Weekday

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# 1) Load data (independent of previous chunks)
data_folder = r"C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC"
os.chdir(data_folder)

listings = pd.read_csv("listings.csv", low_memory=False)
reviews  = pd.read_csv("reviews.csv", low_memory=False)

# 2) Clean price in listings -> price_num
price = listings['price'].astype(str).str.replace(r'[\$,]', '', regex=True)
listings['price_num'] = pd.to_numeric(price, errors='coerce')
listings['price_num'].fillna(listings['price_num'].median(), inplace=True)

## <string>:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
## The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
## 
## For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.

# 3) Clean review dates + add month & weekday
reviews['date'] = pd.to_datetime(reviews['date'], errors='coerce')
reviews = reviews.dropna(subset=['date']).copy()
reviews['month'] = reviews['date'].dt.month          # 1..12
reviews['dow']   = reviews['date'].dt.dayofweek      # 0=Mon..6=Sun

# 4) Attach listing price to each review
rev_val = reviews.merge(
    listings[['id', 'price_num']].rename(columns={'id': 'listing_id'}),
    on='listing_id',
    how='left'
)
rev_val['value'] = rev_val['price_num'].fillna(0)

# 5) Month x Weekday matrix of total value
month_week = (rev_val
              .groupby(['month', 'dow'])['value']
              .sum()
              .unstack('dow', fill_value=0)
              .reindex(index=range(1, 13), columns=range(7), fill_value=0))

# Quick sanity check so you know it's not all zeros
print("Check totals per month (should be > 0):")

## Check totals per month (should be > 0):

print(month_week.sum(axis=1))

## month
## 1     3389667.0
## 2     3462128.0
## 3     5739821.0
## 4     6216559.0
## 5     6507832.0
## 6     5947869.0
## 7     6194675.0
## 8     6028095.0
## 9     5162715.0
## 10    5589351.0
## 11    4091749.0
## 12    3665167.0
## dtype: float64

# 6) Plot stacked bar chart
day_labels = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
colors     = ['#1f77b4', '#d62728', '#2ca02c', '#7f7f7f',
              '#9467bd', '#ffbf00', '#8c564b']

def _millions(x, pos):
    return f'${x/1_000_000:.1f}M'

fig, ax = plt.subplots(figsize=(13, 6))
bottom = np.zeros(12)

for d in range(7):
    vals = month_week[d].to_numpy()
    ax.bar(
        month_week.index,
        vals,
        bottom=bottom,
        label=day_labels[d],
        color=colors[d],
        width=0.85
    )
    bottom += vals

ax.set_title('Washington DC — Total Listings Value by Month (Stacked by Weekday)', pad=12)
ax.set_xlabel('Month')
ax.set_ylabel('Total Listings Value ($)')
ax.yaxis.set_major_formatter(FuncFormatter(_millions))
ax.set_xticks(range(1, 13))
ax.legend(ncol=1, frameon=True, loc='upper right')
ax.grid(axis='y', alpha=.25, linestyle='--')

plt.tight_layout()
plt.show()

Share of Total Listing Value by Weekday

This donut chart summarizes how total estimated listing value is distributed across the days of the week. Each slice shows both the percentage share and dollar value contributed by that weekday. This helps identify which days drive the highest overall revenue in Washington, DC’s Airbnb market.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# ---------- build rev_val if it doesn't exist ----------
if 'rev_val' not in globals():
    # ensure price numeric
    if 'price_num' not in listings.columns:
        price = listings['price'].astype(str).str.replace('[\$,]', '', regex=True)
        listings['price_num'] = pd.to_numeric(price, errors='coerce')
    # parse date + derive DOW
    reviews['date'] = pd.to_datetime(reviews['date'], errors='coerce')
    reviews = reviews.dropna(subset=['date']).copy()
    reviews['dow'] = reviews['date'].dt.dayofweek  # 0=Mon..6=Sun
    # merge price into reviews as "value"
    rev_val = reviews.merge(
        listings[['id', 'price_num']].rename(columns={'id':'listing_id'}),
        on='listing_id',
        how='left',
        validate='m:1'
    )
    rev_val['value'] = rev_val['price_num'].fillna(0)

# ---------- aggregate ----------
by_dow = rev_val.groupby('dow')['value'].sum().reindex(range(7), fill_value=0)
day_labels = ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']
values = by_dow.values
total  = values.sum()

# ---------- plot ----------
fig, ax = plt.subplots(figsize=(7.5, 7.5))
wedges, texts, autotexts = ax.pie(
    values,
    labels=day_labels,
    autopct=lambda p: f"{p:.1f}%\n(${p*total/100:,.0f})",
    startangle=90,
    pctdistance=0.72,
    labeldistance=1.08,
    wedgeprops=dict(width=0.36, edgecolor='white')
)

# donut hole
centre = plt.Circle((0, 0), 0.60, fc='white')
ax.add_artist(centre)

# center label
ax.text(
    0, 0,
    f'Total Value\n${total:,.0f}',
    ha='center', va='center',
    fontsize=12, weight='bold'
)

ax.set_title('Washington DC — Share of Total Listings Value by Weekday', pad=14)
ax.axis('equal')

## (np.float64(-1.0999999605714577), np.float64(1.0999998450732795), np.float64(-1.1000000039647957), np.float64(1.1000000001889165))

plt.tight_layout()

# ---------- save ----------
outdir = Path(r"C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC")
outdir.mkdir(parents=True, exist_ok=True)
outfile = outdir / "donut_share_value_by_weekday.png"
plt.savefig(outfile, dpi=300, bbox_inches="tight")
print(f"Saved to: {outfile}")

## Saved to: C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC\donut_share_value_by_weekday.png

plt.show()

5.Monthly Weekday Trends (Multi-line Plot)

This visualization compares how estimated listing value varies across months for each day of the week. Each line represents a weekday (Mon–Sun), allowing us to see which days consistently generate higher value throughout the year and how weekend vs. weekday demand shifts seasonally.

## <string>:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
## The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.
## 
## For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.

## Saved to: C:\Users\rusud\OneDrive\Desktop\R_DataFiles\Washington DC\multiline_month_weekday_value.png

```

Data Visualization – Python

Rusudan (Russo) Tsereteli

11/09/2025