import numpy as np
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
import sys
import warnings
import datetime
from os import path
from matplotlib import style
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
#warnings.filterwarnings("ignore")
pwd = "/Users/bryansullivan/Desktop/Data Visualization/"
pwd
'/Users/bryansullivan/Desktop/Data Visualization/'
filename ='/Users/bryansullivan/Desktop/Data Visualization/AirBNB/AB_NYC_2019.csv'
airbnb = pd.read_csv(filename, encoding='latin1' )
print(airbnb.head(5))
id name host_id \
0 2539 Clean & quiet apt home by the park 2787
1 2595 Skylit Midtown Castle 2845
2 3647 THE VILLAGE OF HARLEM....NEW YORK ! 4632
3 3831 Cozy Entire Floor of Brownstone 4869
4 5022 Entire Apt: Spacious Studio/Loft by central park 7192
host_name neighbourhood_group neighbourhood latitude longitude \
0 John Brooklyn Kensington 40.64749 -73.97237
1 Jennifer Manhattan Midtown 40.75362 -73.98377
2 Elisabeth Manhattan Harlem 40.80902 -73.94190
3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976
4 Laura Manhattan East Harlem 40.79851 -73.94399
room_type price minimum_nights number_of_reviews last_review \
0 Private room 149 1 9 10/19/18
1 Entire home/apt 225 1 45 5/21/19
2 Private room 150 3 0 NaN
3 Entire home/apt 89 1 270 7/5/19
4 Entire home/apt 80 10 9 11/19/18
reviews_per_month calculated_host_listings_count availability_365
0 0.21 6 365
1 0.38 2 355
2 NaN 1 365
3 4.64 1 194
4 0.10 1 0
airbnb.dtypes
id int64
name object
host_id int64
host_name object
neighbourhood_group object
neighbourhood object
latitude float64
longitude float64
room_type object
price int64
minimum_nights int64
number_of_reviews int64
last_review object
reviews_per_month float64
calculated_host_listings_count int64
availability_365 int64
dtype: object
airbnb.isnull().sum()
id 0
name 16
host_id 0
host_name 21
neighbourhood_group 0
neighbourhood 0
latitude 0
longitude 0
room_type 0
price 0
minimum_nights 0
number_of_reviews 0
last_review 10052
reviews_per_month 10052
calculated_host_listings_count 0
availability_365 0
dtype: int64
airbnb["reviews_per_month"] = airbnb["reviews_per_month"].fillna(airbnb["reviews_per_month"].mean())
airbnb["last_review"] = pd.to_datetime(airbnb.last_review)
airbnb.last_review.fillna(method="ffill", inplace=True)
for column in airbnb.columns:
if airbnb[column].isnull().sum() != 0:
airbnb[column] = airbnb[column].fillna(airbnb[column].mode()[0])
airbnb.isnull().sum()
id 0
name 0
host_id 0
host_name 0
neighbourhood_group 0
neighbourhood 0
latitude 0
longitude 0
room_type 0
price 0
minimum_nights 0
number_of_reviews 0
last_review 0
reviews_per_month 0
calculated_host_listings_count 0
availability_365 0
dtype: int64
data = airbnb.neighbourhood.value_counts()[:10]
plt.figure(figsize=(12, 8))
x = list(data.index)
y = list(data.values)
x.reverse()
y.reverse()
plt.title("Most Popular Neighbourhood")
plt.ylabel("Neighbourhood Area")
plt.xlabel("Number of guest Who host in this Area")
plt.barh(x, y)
<BarContainer object of 10 artists>
png
airbnb.drop('price', axis=1).corrwith(airbnb.price).plot.barh(figsize=(10, 8),
title='Correlation with Response Variable', fontsize=15, grid=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1178f4ac8>
png
text=''
for i in airbnb.name:
text += i +' '
mask = np.array(Image.open("/Users/bryansullivan/Desktop/Data Visualization/AirBNB/Statue 2.jpg"))
WC = WordCloud(background_color = 'black', mask=mask, max_words= 50,)
WC.generate(text)
plt.figure(figsize=(15,10))
plt.imshow(WC,interpolation="bilinear")
plt.axis("off")
plt.tight_layout(pad=0)
plt.show()
png
import urllib
plt.figure(figsize=(10,8))
i=urllib.request.urlopen('https://upload.wikimedia.org/wikipedia/commons/e/ec/Neighbourhoods_New_York_City_Map.PNG')
nyc_img=plt.imread(i)
plt.imshow(nyc_img,zorder=0,extent=[-74.258, -73.7, 40.49,40.92])
ax=plt.gca()
airbnb=airbnb[airbnb.price < 500]
airbnb.plot(kind='scatter', x='longitude', y='latitude',label='id', c='price', ax=ax,
cmap=plt.get_cmap('jet'), colorbar=True, alpha=0.4, zorder=5,s=5)
plt.legend()
plt.show()
png