import os
os.environ['QT_QPA_PLATFORM_PLUGIN_PATH'] = 'D:/Anaconda3/Library/plugins/platforms'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import folium
warnings.filterwarnings("ignore")
filename = 'U:/listings.csv'
path = "U:/"
df = pd.read_csv(filename, usecols = ['neighbourhood_group', 'neighbourhood', 'room_type', 'price', 'latitude', 'longitude', 'minimum_nights', 'number_of_reviews'])
x = df.groupby(['neighbourhood', 'neighbourhood_group']).agg({'neighbourhood':['count'], 'price':['sum', 'mean']}).reset_index()
x.columns = ['neighbourhood', 'neighbourhood_group', 'Count', 'TotalPrice', 'AverPrice']
x = x.sort_values('Count', ascending=False)
x.reset_index(inplace=True, drop=True)
I analyzed Airbnb information, like price, region, and the number, in Hawaii. This is because Hawaii is one of the most popular resort areas.
There is an average of 776 Airbnb spots in Hawaii. Primary Urban Center is the most popular area in Hawaii. The area placed first by about 1,000 more than the second place, Kihei-Makena. This is because there is resort area that includes Waikiki in Primary Urban Center. Waikiki beach is one of the most famous beaches. On the other hand, the graph shows not only the gap between first and second place but also the gap between third and fourth place, which is very notable.
def pick_colors_according_to_neighbourhood_group_count(this_data):
colors=[]
for each in this_data.neighbourhood_group:
if each == 'Hawaii':
colors.append('deepskyblue')
elif each == 'Honolulu':
colors.append('dimgrey')
elif each == 'Kauai':
colors.append('salmon')
else:
colors.append('cornflowerblue')
return colors
import matplotlib.patches as mpatches
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
d1 = x.sort_values('Count', ascending=True)
d1.reset_index(inplace=True, drop=True)
my_colors2 = pick_colors_according_to_neighbourhood_group_count(d1)
Above = mpatches.Patch(color='deepskyblue', label='Hawaii')
At = mpatches.Patch(color='dimgrey', label='Honolulu')
At1 = mpatches.Patch(color='salmon', label='Kauai')
Below = mpatches.Patch(color='cornflowerblue', label='Maui')
fig = plt.figure(figsize=(18, 12))
ax1 = fig.add_subplot(1, 1, 1)
ax1.barh(d1.neighbourhood, d1.Count, color=my_colors2)
for row_counter, value_at_row_counter in enumerate(d1.Count):
ax1.text(value_at_row_counter+2, row_counter, str('{:,}'.format(value_at_row_counter)), color='dimgrey', size=12, fontweight='bold',
ha='left', va='center', backgroundcolor='white')
ax1.legend(loc='lower right', handles=[Above, At, At1, Below], fontsize=14)
plt.axvline(d1.Count.mean(), color='black', linestyle='dashed')
ax1.text(d1.Count.mean()+3, 0, 'Mean = ' + str(d1.Count.mean()), rotation=0, fontsize=14)
ax1.set_title('The popular Airbnb areas in Hawaii', size=20)
ax1.set_xlabel('The number of Airbnb', fontsize=16)
ax1.set_ylabel('Areas', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
ax1.xaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
plt.show()
Maui is the highest average Airbnb price and has the most Airbnb spots in Hawaii. Also, Kauai has high prices but the number of Airbnb spots are lower than other places. Kauai is island paradise, which is quieter than other places. Kauai is also a smaller island, so there are not many Airbnb spots. Alternately, Honolulu, which is the largest city in Hawaii, has a cheaper price but the Airbnb spots are in second place on the graph.
y = df.groupby(['neighbourhood_group']).agg({'neighbourhood_group':['count'], 'price':['sum', 'mean']}).reset_index()
y.columns = ['neighbourhood_group', 'Count', 'TotalPrice', 'AverPrice']
def autolabel(these_bars, this_ax, place_of_decimals, symbol):
for each_bar in these_bars:
height = each_bar.get_height()
this_ax.text(each_bar.get_x()+each_bar.get_width()/2, height*1.01, symbol+format(height, place_of_decimals),
fontsize=14, color='black', ha='center', va='bottom')
fig = plt.figure(figsize=(18, 10))
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
bar_width = 0.4
x_pos = np.arange(4)
count_bars = ax1.bar(x_pos-(0.5*bar_width), y.Count, bar_width, color='paleturquoise', edgecolor='black', label='The number of Airbnb')
aver_price_bars = ax2.bar(x_pos+(0.5*bar_width), y.AverPrice, bar_width, color='lightsalmon', edgecolor='black', label='Average Price')
ax1.set_xlabel('Cities', fontsize=18)
ax1.set_ylabel('The number of Airbnb', fontsize=18, labelpad=20)
ax2.set_ylabel('Average Price', fontsize=18, rotation=270, labelpad=20)
ax1.tick_params(axis='y', labelsize=14)
ax2.tick_params(axis='y', labelsize=14)
plt.title('The number of Airbnb and the Price in Hawaii', fontsize=18)
ax1.set_xticks(x_pos)
ax1.set_xticklabels(y.neighbourhood_group, fontsize=14, fontweight='bold')
count_color, count_label = ax1.get_legend_handles_labels()
price_color, price_label = ax2.get_legend_handles_labels()
legend = ax1.legend(count_color + price_color, count_label + price_label, loc='upper left', frameon=True, ncol=1, shadow=True,
borderpad=1, fontsize=14)
ax1.set_ylim(0, y.Count.max()*1.60)
ax2.set_ylim(0, y.AverPrice.max()*1.30)
autolabel(count_bars, ax1, ',', '')
autolabel(aver_price_bars, ax2, '.2f', '$')
ax1.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
plt.show()
This graph shows Primary Urban Center has the highest number of reviews. The area placed first by about 40,000 more than the second place, Kihei-Makena. Also, the rankings are a little different from the popular Airbnb areas. However, there are still a lot of reviews with the popular Airbnb areas.
bar_df = df.groupby(['neighbourhood', 'neighbourhood_group']).agg({'number_of_reviews':['sum']}).reset_index()
bar_df.columns = ['neighbourhood', 'neighbourhood_group', 'sum']
z = bar_df.sort_values('sum', ascending=False)
z.reset_index(inplace=True, drop=True)
import matplotlib.patches as mpatches
my_colors1 = pick_colors_according_to_neighbourhood_group_count(x)
Above = mpatches.Patch(color='deepskyblue', label='Hawaii')
At = mpatches.Patch(color='dimgrey', label='Honolulu')
At1 = mpatches.Patch(color='salmon', label='Kauai')
Below = mpatches.Patch(color='cornflowerblue', label='Maui')
fig = plt.figure(figsize=(18, 16))
fig.suptitle('The number of review in Hawaii',
fontsize=18, fontweight='bold')
ax1 = fig.add_subplot(2, 1, 1)
ax1.bar(z["neighbourhood"], z["sum"], label='Total reviews amount', color=my_colors1)
ax1.legend(handles=[Above, At, At1, Below], fontsize=14)
plt.axhline(z["sum"].mean(), color='black', linestyle='dashed')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.axes.xaxis.set_visible(True)
fig.autofmt_xdate(rotation=45)
ax1.text(26, z["sum"].mean()+5, 'Mean = ' + str('{:,}'.format(z["sum"].mean())), rotation=0, fontsize=14)
fig.subplots_adjust(hspace = 0.35)
ax1.yaxis.set_major_formatter(StrMethodFormatter('{x:,.0f}'))
plt.show()
The home/apt are the highest percentage in all areas. This indicates that the most people in Hawaii provide guests with their entire place, including a private entrance and no shared spaces. On the other hand, both hotel rooms and shared rooms are not popular in Hawaii. Kauai especially has no shared room types of Airbnbs.
pie_df = df.groupby(['neighbourhood_group', 'room_type']).agg({'room_type':['count']}).reset_index()
pie_df.columns = ['neighbourhood_group', 'room_type', 'count']
s = pie_df.drop(pie_df.index[[0, 1, 2, 3, 8, 9, 10]])
s.reset_index(inplace=True, drop = True)
number_outside_colors = len(s.neighbourhood_group.unique())
outside_color_ref_number = np.arange(number_outside_colors)*2
number_inside_colors = len(s["room_type"].unique())
all_color_ref_number = np.arange(number_outside_colors + number_inside_colors)
inside_color_ref_number = []
for each in all_color_ref_number:
if each not in outside_color_ref_number:
inside_color_ref_number.append(each)
fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)
all_count = s["count"].sum()
s.groupby(['neighbourhood_group'])['count'].sum().plot(
kind='pie', radius=1.2, colors = outer_colors, pctdistance = 0.9, labeldistance =1.1,
wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':12},
autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*all_count),
startangle=90)
inner_colors = colormap(inside_color_ref_number)
s["count"].plot(
kind='pie', radius=0.95, colors = inner_colors, pctdistance = 1.07, labeldistance =0.5,
wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':7.9}, rotatelabels=True,
labels = pie_df.room_type,
autopct = '%1.1f%%',
startangle=90)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Total room types by Honolulu and Maui', fontsize=18)
ax.text(0, 0, 'Total room\n' + str('{:,}'.format(all_count)), size =16, ha='center', va='center')
ax.axis('equal')
plt.tight_layout()
plt.show()
r = pie_df.drop(pie_df.index[[4, 5, 6, 7, 11, 12, 13, 14]])
r.reset_index(inplace=True, drop = True)
fig = plt.figure(figsize=(9,9))
ax = fig.add_subplot(1, 1, 1)
colormap = plt.get_cmap("tab20c")
outer_colors = colormap(outside_color_ref_number)
all_count2 = r["count"].sum()
r.groupby(['neighbourhood_group'])['count'].sum().plot(
kind='pie', radius=1, colors = outer_colors, pctdistance = 0.9, labeldistance =1.1,
wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':12},
autopct = lambda p: '{:.2f}%\n({:,.0f})'.format(p,(p/100)*all_count2),
startangle=90)
inner_colors = colormap(inside_color_ref_number)
r["count"].plot(
kind='pie', radius=0.8, colors = inner_colors, pctdistance = 1.03, labeldistance =0.5,
wedgeprops = dict(edgecolor='w'), textprops = {'fontsize':6.8}, rotatelabels=True,
labels = pie_df.room_type,
autopct = '%1.1f%%',
startangle=90)
hole = plt.Circle((0,0), 0.3, fc='white')
fig1 = plt.gcf()
fig1.gca().add_artist(hole)
ax.yaxis.set_visible(False)
plt.title('Total room types by Hawaii and Kauai', fontsize=18)
ax.text(0, 0, 'Total room\n' + str('{:,}'.format(all_count2)), size =16, ha='center', va='center')
ax.axis('equal')
plt.tight_layout()
plt.show()
This map is the location of the Airbnbs in Hawaii. In general, the Airbnbs are on the coast. The majority of the Airbnbs in Maui area are concentrated in one island, which is the biggest island in Maui. In Hawaii, no Airbnbs are located in the center of the islands because there are mountains. Also, there are more Airbnbs in the east coast area than in the west coast area, which is a famous luxury resort area.
[The areas on Map] Grey: Hawaii, Blue: Honolulu, Green: Kauai Area, Red: Maui
center_of_map = [19.8968, -155.5828]
my_map = folium.Map(location = center_of_map,
zoom_start = 7.4,
width = '90%',
height = '100%',
left = '5%',
right = '5%',
top = '0%', tiles = 'openstreetmap')
#tiles = ['cartodbpositron', 'openstreetmap', 'stamenterrain', 'stamentoner']
maui = df[df.neighbourhood_group == 'Maui']
for i in range(len(maui)):
folium.Circle(radius=10,
location=[maui.iloc[i]['latitude'], maui.iloc[i]['longitude']],
tooltip=str(maui.iloc[i]['neighbourhood']),
color="red", fill=True).add_to(my_map)
honolulu = df[df.neighbourhood_group == 'Honolulu']
for i in range(len(honolulu)):
folium.Circle(radius=10,
location=[honolulu.iloc[i]['latitude'], honolulu.iloc[i]['longitude']],
tooltip=str(honolulu.iloc[i]['neighbourhood']),
color="blue", fill=True).add_to(my_map)
kauai = df[df.neighbourhood_group == 'Kauai']
for i in range(len(kauai)):
folium.Circle(radius=10,
location=[kauai.iloc[i]['latitude'], kauai.iloc[i]['longitude']],
tooltip=str(kauai.iloc[i]['neighbourhood']),
color="green", fill=True).add_to(my_map)
hawaii = df[df.neighbourhood_group == 'Hawaii']
for i in range(len(hawaii)):
folium.Circle(radius=10,
location=[hawaii.iloc[i]['latitude'], hawaii.iloc[i]['longitude']],
tooltip=str(hawaii.iloc[i]['neighbourhood']),
color="grey", fill=True).add_to(my_map)
my_map.save(path + 'Hawaii.html')