print("Data source: https://data.baltimorecity.gov/Public-Safety/911-Police-Calls-for-Service/xviu-ezkt")
## Data source: https://data.baltimorecity.gov/Public-Safety/911-Police-Calls-for-Service/xviu-ezkt
def autolabel(rects,p,ax,symbol):
    for rect in rects:
        h = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2,h*1.01,symbol+format(h,p),fontsize=10,ha='center',color='black',va='bottom')

def generateBaseMap(default_location=[39.2858, -76.6206], default_zoom_start=11):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map
print("Python System Version:"+sys.version)
## Python System Version:3.7.4 (tags/v3.7.4:e09359112e, Jul  8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]
input_file = 'C:/work/DataScience/GB736/assignment-04/911_Police_Calls_for_Service.csv'
sns.set(style="darkgrid")
my_type_colors= sns.color_palette("muted")
## Reading input csv file into dataframe
start_time = time.time()
rows = 100000
#df0 = pd.read_csv(input_file)
df0 = pd.read_csv(input_file,nrows=rows)
print("Reading took %s seconds to read %d lines." % ((time.time() - start_time), df0.shape[0]))
#df0.isna().sum()
## Reading took 1.2689998149871826 seconds to read 100000 lines.
print("Data shape:"+str(df0.shape))
## Data shape:(100000, 20)

Data manipulation

df0 = df0[df0['ZipCode'].notna()]
df0 = df0.replace(to_replace='None', value=np.nan).dropna()
df0.head(2)
##     RecordID  CallNumber  ... 2010 Census Wards Precincts Zip Codes
## 10   7473387  P200581839  ...                        38.0   27937.0
## 12   7473302  P200581709  ...                        39.0   27301.0
## 
## [2 rows x 20 columns]
df0[["Street","City","Loc"]] = df0.Location.str.split("\n", expand=True)
df0['Loc']= df0['Loc'].apply(lambda x: str(x).strip("("))
df0['Loc']= df0['Loc'].apply(lambda x: str(x).strip(")"))
df0[['long','lat']] =df0.Loc.str.split(", ",expand=True)

convert_dict = {'ZipCode': int,'CouncilDistrict':int,'PolicePost':int} 
df0 = df0.astype(convert_dict) 
convert_dict = {'ZipCode': str,'CouncilDistrict':str,'PolicePost':str} 
df0 = df0.astype(convert_dict) 
df0['CallDate']  = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").date,axis=1)
df0['CallYear']  = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").year,axis=1)
df0['CallMonth'] = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").month,axis=1)
df0['CallDay']   = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").day,axis=1)
df0['CallWeekday']= df0.apply(lambda row: calendar.day_name[datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").weekday()],axis=1)
#df0['CallHour']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").hour,axis=1)
df0['CallHour']  = df0.apply(lambda row: pd.to_datetime(row['CallDateTime'],format="%m/%d/%Y %I:%M:%S %p").hour,axis=1)
df0['CallMin']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").minute,axis=1)
df0['CallSec']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").second,axis=1)
df0=df0.drop(['Loc','Location'],axis=1)
df = df0.sample(frac =1.0)

Ploting Pie Chart of 911 call distribution by Call Priority

df0[df0.CallHour>12].head(10)
##      RecordID  CallNumber            CallDateTime  ... CallHour CallMin CallSec
## 193   7473935  P200582467  02/27/2020 02:01:00 PM  ...       14       1       0
## 210   7473878  P200582420  02/27/2020 01:44:00 PM  ...       13      44       0
## 244   7473950  P200582468  02/27/2020 02:00:00 PM  ...       14       0       0
## 245   7473960  P200582391  02/27/2020 01:37:00 PM  ...       13      37       0
## 247   7473900  P200582370  02/27/2020 01:33:00 PM  ...       13      33       0
## 267   7473989  P200582524  02/27/2020 02:17:00 PM  ...       14      17       0
## 271   7473931  P200582373  02/27/2020 01:34:00 PM  ...       13      34       0
## 274   7473818  P200582327  02/27/2020 01:18:00 PM  ...       13      18       0
## 293   7473972  P200582482  02/27/2020 02:05:00 PM  ...       14       5       0
## 339   7473938  P200582443  02/27/2020 01:55:00 PM  ...       13      55       0
## 
## [10 rows x 31 columns]
dfp1 = df.groupby(['Priority'],as_index=False).agg({'RecordID':'count'})
dfp1.rename(columns={'RecordID':'CallCount'},inplace=True)
dfp1.sort_values(by=['CallCount'], ascending=False)
##         Priority  CallCount
## 3  Non-Emergency       5448
## 2         Medium        660
## 1            Low        396
## 0           High        180
dfp1.head(10)
##         Priority  CallCount
## 0           High        180
## 1            Low        396
## 2         Medium        660
## 3  Non-Emergency       5448
matplotlib.style.use('ggplot')
fig = plt.figure(figsize=(8,8))
labels =dfp1.Priority #['wages','taxes','fees']
colors = ['red','green','blue','purple','orange']
mydata =dfp1.CallCount #[100,100,100]
explode = np.arange(dfp1.Priority.count(), dtype=np.float)

patches,texts,autotexts = plt.pie(mydata,labels=labels,colors=my_type_colors,autopct='%1.1f%%',shadow=True, startangle= 160)
[eachlabel.set_fontsize(18) for eachlabel in texts ]
## [None, None, None, None]
[eachlabel.set_fontsize(18) for eachlabel in autotexts ]
## [None, None, None, None]
[eachlabel.set_color('white') for eachlabel in autotexts ]
## [None, None, None, None]
autotexts[0].set_color('Black')
texts[0].set_fontsize(20)
plt.axis('equal')
## (-1.1006644869279618, 1.1058262695886127, -1.1061446313786636, 1.1050543291946804)
plt.title('911 Calls distribution by Call Priority',fontsize=20,color='Blue')
plt.show()