Python-Assignment-01 Analysis & Visualization of 911 Police calls

Data manipulation
Ploting Pie Chart of 911 call distribution by Call Priority

print("Data source: https://data.baltimorecity.gov/Public-Safety/911-Police-Calls-for-Service/xviu-ezkt")

## Data source: https://data.baltimorecity.gov/Public-Safety/911-Police-Calls-for-Service/xviu-ezkt

def autolabel(rects,p,ax,symbol):
    for rect in rects:
        h = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2,h*1.01,symbol+format(h,p),fontsize=10,ha='center',color='black',va='bottom')

def generateBaseMap(default_location=[39.2858, -76.6206], default_zoom_start=11):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

print("Python System Version:"+sys.version)

## Python System Version:3.7.4 (tags/v3.7.4:e09359112e, Jul  8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]

input_file = 'C:/work/DataScience/GB736/assignment-04/911_Police_Calls_for_Service.csv'
sns.set(style="darkgrid")
my_type_colors= sns.color_palette("muted")

## Reading input csv file into dataframe
start_time = time.time()
rows = 100000
#df0 = pd.read_csv(input_file)
df0 = pd.read_csv(input_file,nrows=rows)
print("Reading took %s seconds to read %d lines." % ((time.time() - start_time), df0.shape[0]))
#df0.isna().sum()

## Reading took 1.2689998149871826 seconds to read 100000 lines.

print("Data shape:"+str(df0.shape))

## Data shape:(100000, 20)

Data manipulation

Filter and consider rows where ZipCode is not NA
Replace None with NA and drop all NA
Break location column into Street, City and Location (Long & Lat)
Convert data types to int and string
Break CallDateTime into Call Year, Month, Day of week, Hour, Minute and Seconds
Remove CallDateTime, Loc and Location columns after breaking them into seperate columns
Taking % Sample for initial analysis

df0 = df0[df0['ZipCode'].notna()]
df0 = df0.replace(to_replace='None', value=np.nan).dropna()
df0.head(2)

##     RecordID  CallNumber  ... 2010 Census Wards Precincts Zip Codes
## 10   7473387  P200581839  ...                        38.0   27937.0
## 12   7473302  P200581709  ...                        39.0   27301.0
## 
## [2 rows x 20 columns]

df0[["Street","City","Loc"]] = df0.Location.str.split("\n", expand=True)
df0['Loc']= df0['Loc'].apply(lambda x: str(x).strip("("))
df0['Loc']= df0['Loc'].apply(lambda x: str(x).strip(")"))
df0[['long','lat']] =df0.Loc.str.split(", ",expand=True)

convert_dict = {'ZipCode': int,'CouncilDistrict':int,'PolicePost':int} 
df0 = df0.astype(convert_dict) 
convert_dict = {'ZipCode': str,'CouncilDistrict':str,'PolicePost':str} 
df0 = df0.astype(convert_dict)

df0['CallDate']  = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").date,axis=1)
df0['CallYear']  = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").year,axis=1)
df0['CallMonth'] = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").month,axis=1)
df0['CallDay']   = df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").day,axis=1)
df0['CallWeekday']= df0.apply(lambda row: calendar.day_name[datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").weekday()],axis=1)
#df0['CallHour']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").hour,axis=1)
df0['CallHour']  = df0.apply(lambda row: pd.to_datetime(row['CallDateTime'],format="%m/%d/%Y %I:%M:%S %p").hour,axis=1)
df0['CallMin']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").minute,axis=1)
df0['CallSec']= df0.apply(lambda row: datetime.strptime(row["CallDateTime"], "%m/%d/%Y %H:%M:%S %p").second,axis=1)

df0=df0.drop(['Loc','Location'],axis=1)
df = df0.sample(frac =1.0)

Ploting Pie Chart of 911 call distribution by Call Priority

df0[df0.CallHour>12].head(10)

##      RecordID  CallNumber            CallDateTime  ... CallHour CallMin CallSec
## 193   7473935  P200582467  02/27/2020 02:01:00 PM  ...       14       1       0
## 210   7473878  P200582420  02/27/2020 01:44:00 PM  ...       13      44       0
## 244   7473950  P200582468  02/27/2020 02:00:00 PM  ...       14       0       0
## 245   7473960  P200582391  02/27/2020 01:37:00 PM  ...       13      37       0
## 247   7473900  P200582370  02/27/2020 01:33:00 PM  ...       13      33       0
## 267   7473989  P200582524  02/27/2020 02:17:00 PM  ...       14      17       0
## 271   7473931  P200582373  02/27/2020 01:34:00 PM  ...       13      34       0
## 274   7473818  P200582327  02/27/2020 01:18:00 PM  ...       13      18       0
## 293   7473972  P200582482  02/27/2020 02:05:00 PM  ...       14       5       0
## 339   7473938  P200582443  02/27/2020 01:55:00 PM  ...       13      55       0
## 
## [10 rows x 31 columns]

dfp1 = df.groupby(['Priority'],as_index=False).agg({'RecordID':'count'})
dfp1.rename(columns={'RecordID':'CallCount'},inplace=True)
dfp1.sort_values(by=['CallCount'], ascending=False)

##         Priority  CallCount
## 3  Non-Emergency       5448
## 2         Medium        660
## 1            Low        396
## 0           High        180

dfp1.head(10)

##         Priority  CallCount
## 0           High        180
## 1            Low        396
## 2         Medium        660
## 3  Non-Emergency       5448

matplotlib.style.use('ggplot')
fig = plt.figure(figsize=(8,8))
labels =dfp1.Priority #['wages','taxes','fees']
colors = ['red','green','blue','purple','orange']
mydata =dfp1.CallCount #[100,100,100]
explode = np.arange(dfp1.Priority.count(), dtype=np.float)

patches,texts,autotexts = plt.pie(mydata,labels=labels,colors=my_type_colors,autopct='%1.1f%%',shadow=True, startangle= 160)
[eachlabel.set_fontsize(18) for eachlabel in texts ]

## [None, None, None, None]

[eachlabel.set_fontsize(18) for eachlabel in autotexts ]

## [None, None, None, None]

[eachlabel.set_color('white') for eachlabel in autotexts ]

## [None, None, None, None]

autotexts[0].set_color('Black')
texts[0].set_fontsize(20)
plt.axis('equal')

## (-1.1006644869279618, 1.1058262695886127, -1.1061446313786636, 1.1050543291946804)

plt.title('911 Calls distribution by Call Priority',fontsize=20,color='Blue')
plt.show()

Python-Assignment-01 Analysis & Visualization of 911 Police calls

Premdutt Gaur

3/28/2020

Data manipulation

Ploting Pie Chart of 911 call distribution by Call Priority