import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import folium
warnings.filterwarnings("ignore")
filename = 'U:/listings.csv'
path = "U:/"
df = pd.read_csv(filename, usecols = ['neighbourhood_group', 'neighbourhood', 'room_type', 'price', 'latitude', 'longitude', 'minimum_nights', 'number_of_reviews'])
x = df.groupby(['neighbourhood', 'neighbourhood_group']).agg({'neighbourhood':['count'], 'price':['sum', 'mean']}).reset_index()
 
x.columns = ['neighbourhood', 'neighbourhood_group', 'Count', 'TotalPrice', 'AverPrice']
 
x = x.sort_values('Count', ascending=False)
x.reset_index(inplace=True, drop=True)
 

0. Introduction to the Data

I analyzed Airbnb information, like price, region, and the number, in Hawaii. This is because Hawaii is one of the most popular resort areas.

2. The number of Airbnb and the Price (the amount of Airbnb)

Maui is the highest average Airbnb price and has the most Airbnb spots in Hawaii. Also, Kauai has high prices but the number of Airbnb spots are lower than other places. Kauai is island paradise, which is quieter than other places. Kauai is also a smaller island, so there are not many Airbnb spots. Alternately, Honolulu, which is the largest city in Hawaii, has a cheaper price but the Airbnb spots are in second place on the graph.

y = df.groupby(['neighbourhood_group']).agg({'neighbourhood_group':['count'], 'price':['sum', 'mean']}).reset_index()
y.columns = ['neighbourhood_group', 'Count', 'TotalPrice', 'AverPrice']

def autolabel(these_bars, this_ax, place_of_decimals, symbol):
    for each_bar in these_bars:
        height = each_bar.get_height()
        this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, symbol+format(height, place_of_decimals),
                    fontsize=14, color='black', ha='center', va='bottom')

fig = plt.figure(figsize=(18, 10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
bar_width = 0.4

x_pos = np.arange(4)
count_bars = ax1.bar(x_pos-(0.5*bar_width), y.Count, bar_width, color='paleturquoise', edgecolor='black', label='The number of Airbnb')
aver_price_bars = ax2.bar(x_pos+(0.5*bar_width), y.AverPrice, bar_width, color='lightsalmon', edgecolor='black', label='Average Price')

ax1.set_xlabel('Cities', fontsize=18)
ax1.set_ylabel('The number of Airbnb', fontsize=18, labelpad=20)
ax2.set_ylabel('Average Price', fontsize=18, rotation=270, labelpad=20)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)

plt.title('The number of Airbnb and the Price in Hawaii', fontsize=18)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(y.neighbourhood_group, fontsize=14, fontweight='bold')

count_color, count_label = ax1.get_legend_handles_labels()
price_color, price_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + price_color, count_label + price_label, loc='upper left', frameon=True, ncol=1, shadow=True,
                   borderpad=1, fontsize=14)
ax1.set_ylim(0, y.Count.max()*1.60)
ax2.set_ylim(0, y.AverPrice.max()*1.30)
autolabel(count_bars, ax1, ',', '')
autolabel(aver_price_bars, ax2, '.2f', '$')

ax1.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

plt.show()

3. The number of reviews by Area

This graph shows Primary Urban Center has the highest number of reviews. The area placed first by about 40,000 more than the second place, Kihei-Makena. Also, the rankings are a little different from the popular Airbnb areas. However, there are still a lot of reviews with the popular Airbnb areas.

bar_df = df.groupby(['neighbourhood', 'neighbourhood_group']).agg({'number_of_reviews':['sum']}).reset_index()
bar_df.columns = ['neighbourhood', 'neighbourhood_group', 'sum']

z = bar_df.sort_values('sum', ascending=False)
z.reset_index(inplace=True, drop=True)

import matplotlib.patches as mpatches

my_colors1 = pick_colors_according_to_neighbourhood_group_count(x)

Above = mpatches.Patch(color='deepskyblue', label='Hawaii')
At = mpatches.Patch(color='dimgrey', label='Honolulu')
At1 = mpatches.Patch(color='salmon', label='Kauai')
Below = mpatches.Patch(color='cornflowerblue', label='Maui')

fig = plt.figure(figsize=(18, 16))
fig.suptitle('The number of review in Hawaii',
             fontsize=18, fontweight='bold')

ax1 = fig.add_subplot(2, 1, 1)
ax1.bar(z["neighbourhood"], z["sum"], label='Total reviews amount', color=my_colors1)
ax1.legend(handles=[Above, At, At1, Below], fontsize=14)
plt.axhline(z["sum"].mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.axes.xaxis.set_visible(True)
fig.autofmt_xdate(rotation=45)
ax1.text(26, z["sum"].mean()+5, 'Mean = ' + str('{:,}'.format(z["sum"].mean())), rotation=0, fontsize=14)

fig.subplots_adjust(hspace = 0.35)

ax1.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))

plt.show()

4. The percentage of Airbnbs’ room types

The home/apt are the highest percentage in all areas. This indicates that the most people in Hawaii provide guests with their entire place, including a private entrance and no shared spaces. On the other hand, both hotel rooms and shared rooms are not popular in Hawaii. Kauai especially has no shared room types of Airbnbs.

pie_df = df.groupby(['neighbourhood_group', 'room_type']).agg({'room_type':['count']}).reset_index()
pie_df.columns = ['neighbourhood_group', 'room_type', 'count']

s = pie_df.drop(pie_df.index[[0, 1, 2, 3, 8, 9, 10]])
s.reset_index(inplace=True, drop = True)

number_outside_colors = len(s.neighbourhood_group.unique())
outside_color_ref_number = np.arange(number_outside_colors)*2

number_inside_colors = len(s["room_type"].unique())
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)

inside_color_ref_number = []
for each in all_color_ref_number:
    if each not in outside_color_ref_number:
        inside_color_ref_number.append(each)

fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(1, 1, 1)

colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)

all_count = s["count"].sum()

s.groupby(['neighbourhood_group'])['count'].sum().plot(
       kind='pie', radius=1.2, colors = outer_colors, pctdistance = 0.9, labeldistance =1.1,
       wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':12},
       autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*all_count),
       startangle=90)

inner_colors = colormap(inside_color_ref_number)
s["count"].plot(
       kind='pie', radius=0.95, colors = inner_colors, pctdistance = 1.07, labeldistance =0.5,
       wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':7.9}, rotatelabels=True,
       labels = pie_df.room_type,
       autopct = '%1.1f%%',
       startangle=90)

hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)

ax.yaxis.set_visible(False)
plt.title('Total room types by Honolulu and Maui', fontsize=18)

ax.text(0, 0, 'Total room\n' + str('{:,}'.format(all_count)), size =16, ha='center', va='center')

ax.axis('equal')
plt.tight_layout()
plt.show()

r = pie_df.drop(pie_df.index[[4, 5, 6, 7, 11, 12, 13, 14]])
r.reset_index(inplace=True, drop = True)
fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(1, 1, 1)

colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)

all_count2 = r["count"].sum()

r.groupby(['neighbourhood_group'])['count'].sum().plot(
       kind='pie', radius=1, colors = outer_colors, pctdistance = 0.9, labeldistance =1.1,
       wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':12},
       autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*all_count2),
       startangle=90)

inner_colors = colormap(inside_color_ref_number)
r["count"].plot(
       kind='pie', radius=0.8, colors = inner_colors, pctdistance = 1.03, labeldistance =0.5,
       wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':6.8}, rotatelabels=True,
       labels = pie_df.room_type,
       autopct = '%1.1f%%',
       startangle=90)

hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)

ax.yaxis.set_visible(False)
plt.title('Total room types by Hawaii and Kauai', fontsize=18)

ax.text(0, 0, 'Total room\n' + str('{:,}'.format(all_count2)), size =16, ha='center', va='center')

ax.axis('equal')
plt.tight_layout()
plt.show()

5. The map of Airbnb

This map is the location of the Airbnbs in Hawaii. In general, the Airbnbs are on the coast. The majority of the Airbnbs in Maui area are concentrated in one island, which is the biggest island in Maui. In Hawaii, no Airbnbs are located in the center of the islands because there are mountains. Also, there are more Airbnbs in the east coast area than in the west coast area, which is a famous luxury resort area.

[The areas on Map] Grey: Hawaii, Blue: Honolulu, Green: Kauai Area, Red: Maui

center_of_map = [19.8968, -155.5828] 

my_map = folium.Map(location = center_of_map,
                   zoom_start = 7.4,
                   width = '90%', 
                   height = '100%',
                   left = '5%',
                   right = '5%',
                   top = '0%', tiles = 'openstreetmap')

#tiles = ['cartodbpositron', 'openstreetmap', 'stamenterrain', 'stamentoner']

maui = df[df.neighbourhood_group == 'Maui']

for i in range(len(maui)):
    folium.Circle(radius=10,
                 location=[maui.iloc[i]['latitude'], maui.iloc[i]['longitude']],
                 tooltip=str(maui.iloc[i]['neighbourhood']),
                 color="red", fill=True).add_to(my_map)
honolulu = df[df.neighbourhood_group == 'Honolulu']

for i in range(len(honolulu)):
    folium.Circle(radius=10,
                 location=[honolulu.iloc[i]['latitude'], honolulu.iloc[i]['longitude']],
                 tooltip=str(honolulu.iloc[i]['neighbourhood']),
                 color="blue", fill=True).add_to(my_map)
kauai = df[df.neighbourhood_group == 'Kauai']

for i in range(len(kauai)):
    folium.Circle(radius=10,
                 location=[kauai.iloc[i]['latitude'], kauai.iloc[i]['longitude']],
                 tooltip=str(kauai.iloc[i]['neighbourhood']),
                 color="green", fill=True).add_to(my_map)
hawaii = df[df.neighbourhood_group == 'Hawaii']

for i in range(len(hawaii)):
    folium.Circle(radius=10,
                 location=[hawaii.iloc[i]['latitude'], hawaii.iloc[i]['longitude']],
                 tooltip=str(hawaii.iloc[i]['neighbourhood']),
                 color="grey", fill=True).add_to(my_map)
my_map.save(path + 'Hawaii.html')