from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')
plt.style.use(u'ggplot')
from mpl_toolkits.basemap import Basemap
from mpl_toolkits.basemap import Basemap
df = pd.read_csv('911.csv')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 99492 entries, 0 to 99491 Data columns (total 9 columns): lat 99492 non-null float64 lng 99492 non-null float64 desc 99492 non-null object zip 86637 non-null float64 title 99492 non-null object timeStamp 99492 non-null object twp 99449 non-null object addr 98973 non-null object e 99492 non-null int64 dtypes: float64(3), int64(1), object(5) memory usage: 6.8+ MB
df.head()
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 |
| 1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 |
| 2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 |
| 3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 |
| 4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 |
df['zip'].value_counts().head(5)
19401.0 6979 19464.0 6643 19403.0 4854 19446.0 4748 19406.0 3174 Name: zip, dtype: int64
df['twp'].value_counts().head(5)
LOWER MERION 8443 ABINGTON 5977 NORRISTOWN 5890 UPPER MERION 5227 CHELTENHAM 4575 Name: twp, dtype: int64
df['Reason'] = df['title'].apply(lambda title: title.split(':')[0])
df['Reason'].value_counts()
EMS 48877 Traffic 35695 Fire 14920 Name: Reason, dtype: int64
plt.figure(figsize=(8,5))
sns.countplot(x='Reason', data=df, palette='viridis')
plt.title('Different Reasons for 911 Calls')
Text(0.5,1,'Different Reasons for 911 Calls')
df['timeStamp']=pd.to_datetime(df['timeStamp'])
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)
order = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]
plt.figure(figsize=(8,5))
sns.countplot(x='Day of Week',data=df,palette='viridis', order=order)
plt.title('The Number of the 911 Calls in Days of a Week')
plt.tight_layout()
plt.figure(figsize=(8,5))
sns.countplot(x='Day of Week',data=df,hue='Reason',palette='viridis', order=order)
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('The Number of the 911 Calls for Different Reasons in a Week')
plt.tight_layout()
plt.figure(figsize=(8,5))
sns.countplot(x='Month',data=df,palette='viridis')
<matplotlib.axes._subplots.AxesSubplot at 0x1a0ceb6080>
plt.figure(figsize=(8,5))
sns.countplot(x='Month',data=df,hue='Reason',palette='viridis')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
<matplotlib.legend.Legend at 0x1a0cd15668>
byMonth = df.groupby('Month').count()
byMonth['twp'].plot(color='blue')
<matplotlib.axes._subplots.AxesSubplot at 0x1a0cd03048>
sns.lmplot(x='Month',y='twp',data=byMonth.reset_index())
<seaborn.axisgrid.FacetGrid at 0x1a0cccc080>
dayHour = df.groupby(by=['Day of Week','Hour']).count()['Reason'].unstack()
dayHour
| Hour | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Day of Week | |||||||||||||||||||||
| Fri | 275 | 235 | 191 | 175 | 201 | 194 | 372 | 598 | 742 | 752 | ... | 932 | 980 | 1039 | 980 | 820 | 696 | 667 | 559 | 514 | 474 |
| Mon | 282 | 221 | 201 | 194 | 204 | 267 | 397 | 653 | 819 | 786 | ... | 869 | 913 | 989 | 997 | 885 | 746 | 613 | 497 | 472 | 325 |
| Sat | 375 | 301 | 263 | 260 | 224 | 231 | 257 | 391 | 459 | 640 | ... | 789 | 796 | 848 | 757 | 778 | 696 | 628 | 572 | 506 | 467 |
| Sun | 383 | 306 | 286 | 268 | 242 | 240 | 300 | 402 | 483 | 620 | ... | 684 | 691 | 663 | 714 | 670 | 655 | 537 | 461 | 415 | 330 |
| Thu | 278 | 202 | 233 | 159 | 182 | 203 | 362 | 570 | 777 | 828 | ... | 876 | 969 | 935 | 1013 | 810 | 698 | 617 | 553 | 424 | 354 |
| Tue | 269 | 240 | 186 | 170 | 209 | 239 | 415 | 655 | 889 | 880 | ... | 943 | 938 | 1026 | 1019 | 905 | 731 | 647 | 571 | 462 | 274 |
| Wed | 250 | 216 | 189 | 209 | 156 | 255 | 410 | 701 | 875 | 808 | ... | 904 | 867 | 990 | 1037 | 894 | 686 | 668 | 575 | 490 | 335 |
7 rows × 24 columns
plt.figure(figsize=(10,5))
sns.heatmap(dayHour,cmap='Blues')
<matplotlib.axes._subplots.AxesSubplot at 0x1a0f4ecb38>
plt.figure(figsize=(6,3))
sns.clustermap(dayHour,cmap='Blues')
<seaborn.matrix.ClusterGrid at 0x1a1ddf9780>
<Figure size 432x216 with 0 Axes>
dayMonth = df.groupby(by=['Day of Week', 'Month']).count()['Reason'].unstack()
dayMonth
| Month | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 |
|---|---|---|---|---|---|---|---|---|---|
| Day of Week | |||||||||
| Fri | 1970 | 1581 | 1525 | 1958 | 1730 | 1649 | 2045 | 1310 | 1065 |
| Mon | 1727 | 1964 | 1535 | 1598 | 1779 | 1617 | 1692 | 1511 | 1257 |
| Sat | 2291 | 1441 | 1266 | 1734 | 1444 | 1388 | 1695 | 1099 | 978 |
| Sun | 1960 | 1229 | 1102 | 1488 | 1424 | 1333 | 1672 | 1021 | 907 |
| Thu | 1584 | 1596 | 1900 | 1601 | 1590 | 2065 | 1646 | 1230 | 1266 |
| Tue | 1973 | 1753 | 1884 | 1430 | 1918 | 1676 | 1670 | 1612 | 1234 |
| Wed | 1700 | 1903 | 1889 | 1517 | 1538 | 2058 | 1717 | 1295 | 1262 |
plt.figure(figsize=(10,5))
sns.heatmap(dayMonth,cmap='Oranges')
<matplotlib.axes._subplots.AxesSubplot at 0x1a0cedfdd8>
plt.figure(figsize=(8,4))
sns.clustermap(dayMonth, cmap='Oranges')
<seaborn.matrix.ClusterGrid at 0x1a19f88eb8>
<Figure size 576x288 with 0 Axes>
lat = df['lat'].values
lon = df['lng'].values
bygeo = df.groupby(by=['lat','lng']).count()['Reason'].values