import numpy as np
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
import sys
import warnings
import datetime
from os import path
from matplotlib import style
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

#warnings.filterwarnings("ignore")
pwd = "/Users/bryansullivan/Desktop/Data Visualization/"
pwd
'/Users/bryansullivan/Desktop/Data Visualization/'
filename ='/Users/bryansullivan/Desktop/Data Visualization/AirBNB/AB_NYC_2019.csv'

airbnb = pd.read_csv(filename, encoding='latin1' )

print(airbnb.head(5))
     id                                              name  host_id  \
0  2539                Clean & quiet apt home by the park     2787   
1  2595                             Skylit Midtown Castle     2845   
2  3647               THE VILLAGE OF HARLEM....NEW YORK !     4632   
3  3831                   Cozy Entire Floor of Brownstone     4869   
4  5022  Entire Apt: Spacious Studio/Loft by central park     7192   

     host_name neighbourhood_group neighbourhood  latitude  longitude  \
0         John            Brooklyn    Kensington  40.64749  -73.97237   
1     Jennifer           Manhattan       Midtown  40.75362  -73.98377   
2    Elisabeth           Manhattan        Harlem  40.80902  -73.94190   
3  LisaRoxanne            Brooklyn  Clinton Hill  40.68514  -73.95976   
4        Laura           Manhattan   East Harlem  40.79851  -73.94399   

         room_type  price  minimum_nights  number_of_reviews last_review  \
0     Private room    149               1                  9    10/19/18   
1  Entire home/apt    225               1                 45     5/21/19   
2     Private room    150               3                  0         NaN   
3  Entire home/apt     89               1                270      7/5/19   
4  Entire home/apt     80              10                  9    11/19/18   

   reviews_per_month  calculated_host_listings_count  availability_365  
0               0.21                               6               365  
1               0.38                               2               355  
2                NaN                               1               365  
3               4.64                               1               194  
4               0.10                               1                 0  
airbnb.dtypes
id                                  int64
name                               object
host_id                             int64
host_name                          object
neighbourhood_group                object
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                               int64
minimum_nights                      int64
number_of_reviews                   int64
last_review                        object
reviews_per_month                 float64
calculated_host_listings_count      int64
availability_365                    int64
dtype: object
airbnb.isnull().sum()
id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64
airbnb["reviews_per_month"] = airbnb["reviews_per_month"].fillna(airbnb["reviews_per_month"].mean())
airbnb["last_review"] = pd.to_datetime(airbnb.last_review)
airbnb.last_review.fillna(method="ffill", inplace=True)
for column in airbnb.columns:
    if airbnb[column].isnull().sum() != 0:
        airbnb[column] = airbnb[column].fillna(airbnb[column].mode()[0])
airbnb.isnull().sum()
id                                0
name                              0
host_id                           0
host_name                         0
neighbourhood_group               0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
last_review                       0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
dtype: int64
data = airbnb.neighbourhood.value_counts()[:10]
plt.figure(figsize=(12, 8))
x = list(data.index)
y = list(data.values)
x.reverse()
y.reverse()

plt.title("Most Popular Neighbourhood")
plt.ylabel("Neighbourhood Area")
plt.xlabel("Number of guest Who host in this Area")

plt.barh(x, y)
<BarContainer object of 10 artists>
png

png

airbnb.drop('price', axis=1).corrwith(airbnb.price).plot.barh(figsize=(10, 8), 
title='Correlation with Response Variable', fontsize=15, grid=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1178f4ac8>
png

png

text=''
for i in airbnb.name:
    text += i +' '

mask = np.array(Image.open("/Users/bryansullivan/Desktop/Data Visualization/AirBNB/Statue 2.jpg"))
    
WC = WordCloud(background_color = 'black', mask=mask, max_words= 50,)
WC.generate(text)
plt.figure(figsize=(15,10))
plt.imshow(WC,interpolation="bilinear")
plt.axis("off")
plt.tight_layout(pad=0)
plt.show()
png

png

import urllib

plt.figure(figsize=(10,8))

i=urllib.request.urlopen('https://upload.wikimedia.org/wikipedia/commons/e/ec/Neighbourhoods_New_York_City_Map.PNG')
nyc_img=plt.imread(i)

plt.imshow(nyc_img,zorder=0,extent=[-74.258, -73.7, 40.49,40.92])
ax=plt.gca()

airbnb=airbnb[airbnb.price < 500]

airbnb.plot(kind='scatter', x='longitude', y='latitude',label='id', c='price', ax=ax, 
           cmap=plt.get_cmap('jet'), colorbar=True, alpha=0.4, zorder=5,s=5)
plt.legend()
plt.show()
png

png