Need Help?

Transform your raw data into actionable insights. Let my expertise in R and advanced data analysis techniques unlock the power of your information. Get a personalized consultation and see how I can streamline your projects, saving you time and driving better decision-making. Contact me today at or visit to schedule a call.

Contact Now

Load the data

df <- read.csv("37-00049_UOF-P_2016_prepped.csv")

Load the data set in R and called it as df.

Data Pre Processing

dim(df)
## [1] 2384   47
names(df)
##  [1] "INCIDENT_DATE"                               
##  [2] "INCIDENT_TIME"                               
##  [3] "UOF_NUMBER"                                  
##  [4] "OFFICER_ID"                                  
##  [5] "OFFICER_GENDER"                              
##  [6] "OFFICER_RACE"                                
##  [7] "OFFICER_HIRE_DATE"                           
##  [8] "OFFICER_YEARS_ON_FORCE"                      
##  [9] "OFFICER_INJURY"                              
## [10] "OFFICER_INJURY_TYPE"                         
## [11] "OFFICER_HOSPITALIZATION"                     
## [12] "SUBJECT_ID"                                  
## [13] "SUBJECT_RACE"                                
## [14] "SUBJECT_GENDER"                              
## [15] "SUBJECT_INJURY"                              
## [16] "SUBJECT_INJURY_TYPE"                         
## [17] "SUBJECT_WAS_ARRESTED"                        
## [18] "SUBJECT_DESCRIPTION"                         
## [19] "SUBJECT_OFFENSE"                             
## [20] "REPORTING_AREA"                              
## [21] "BEAT"                                        
## [22] "SECTOR"                                      
## [23] "DIVISION"                                    
## [24] "LOCATION_DISTRICT"                           
## [25] "STREET_NUMBER"                               
## [26] "STREET_NAME"                                 
## [27] "STREET_DIRECTION"                            
## [28] "STREET_TYPE"                                 
## [29] "LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION"
## [30] "LOCATION_CITY"                               
## [31] "LOCATION_STATE"                              
## [32] "LOCATION_LATITUDE"                           
## [33] "LOCATION_LONGITUDE"                          
## [34] "INCIDENT_REASON"                             
## [35] "REASON_FOR_FORCE"                            
## [36] "TYPE_OF_FORCE_USED1"                         
## [37] "TYPE_OF_FORCE_USED2"                         
## [38] "TYPE_OF_FORCE_USED3"                         
## [39] "TYPE_OF_FORCE_USED4"                         
## [40] "TYPE_OF_FORCE_USED5"                         
## [41] "TYPE_OF_FORCE_USED6"                         
## [42] "TYPE_OF_FORCE_USED7"                         
## [43] "TYPE_OF_FORCE_USED8"                         
## [44] "TYPE_OF_FORCE_USED9"                         
## [45] "TYPE_OF_FORCE_USED10"                        
## [46] "NUMBER_EC_CYCLES"                            
## [47] "FORCE_EFFECTIVE"
head(df,5)
##   INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID OFFICER_GENDER OFFICER_RACE
## 1    OCCURRED_D    OCCURRED_T     UOFNum CURRENT_BA         OffSex      OffRace
## 2      09-03-16    4:14:00 AM      37702      10810           Male        Black
## 3       3/22/16   11:00:00 PM      33413       7706           Male        White
## 4       5/22/16    1:29:00 PM      34567      11014           Male        Black
## 5      01-10-16    8:55:00 PM      31460       6692           Male        Black
##   OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 1           HIRE_DT    INCIDENT_DATE_LESS_     OFF_INJURE
## 2          05-07-14                      2             No
## 3          01-08-99                     17            Yes
## 4           5/20/15                      1             No
## 5           7/29/91                     24             No
##            OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 1              OFF_INJURE_DESC              OFF_HOSPIT     CitNum      CitRace
## 2 No injuries noted or visible                      No      46424        Black
## 3                Sprain/Strain                     Yes      44324     Hispanic
## 4 No injuries noted or visible                      No      45126     Hispanic
## 5 No injuries noted or visible                      No      43150     Hispanic
##   SUBJECT_GENDER SUBJECT_INJURY          SUBJECT_INJURY_TYPE
## 1         CitSex     CIT_INJURE             SUBJ_INJURE_DESC
## 2         Female            Yes      Non-Visible Injury/Pain
## 3           Male             No No injuries noted or visible
## 4           Male             No No injuries noted or visible
## 5           Male            Yes               Laceration/Cut
##   SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION SUBJECT_OFFENSE REPORTING_AREA BEAT
## 1           CIT_ARREST          CIT_INFL_A      CitChargeT             RA BEAT
## 2                  Yes   Mentally unstable           APOWW           2062  134
## 3                  Yes   Mentally unstable           APOWW           1197  237
## 4                  Yes             Unknown           APOWW           4153  432
## 5                  Yes FD-Unknown if Armed  Evading Arrest           4523  641
##   SECTOR      DIVISION LOCATION_DISTRICT STREET_NUMBER  STREET_NAME
## 1 SECTOR      DIVISION         DIST_NAME      STREET_N       STREET
## 2    130       CENTRAL               D14           211        Ervay
## 3    230     NORTHEAST                D9          7647     Ferguson
## 4    430     SOUTHWEST                D6           716 bimebella dr
## 5    640 NORTH CENTRAL               D11          5600          LBJ
##   STREET_DIRECTION STREET_TYPE LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION
## 1         street_g    street_t                               Street Address
## 2                N         St.                               211 N ERVAY ST
## 3             NULL         Rd.                             7647 FERGUSON RD
## 4             NULL         Ln.                             716 BIMEBELLA LN
## 5             NULL       Frwy.                               5600 L B J FWY
##   LOCATION_CITY LOCATION_STATE LOCATION_LATITUDE LOCATION_LONGITUDE
## 1          City          State          Latitude          Longitude
## 2        Dallas             TX         32.782205         -96.797461
## 3        Dallas             TX         32.798978         -96.717493
## 4        Dallas             TX          32.73971          -96.92519
## 5        Dallas             TX                                     
##   INCIDENT_REASON REASON_FOR_FORCE   TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2
## 1      SERVICE_TY       UOF_REASON            ForceType1          ForceType2
## 2          Arrest           Arrest Hand/Arm/Elbow Strike                    
## 3          Arrest           Arrest           Joint Locks                    
## 4          Arrest           Arrest     Take Down - Group                    
## 5          Arrest           Arrest        K-9 Deployment                    
##   TYPE_OF_FORCE_USED3 TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5
## 1          ForceType3          ForceType4          ForceType5
## 2                                                            
## 3                                                            
## 4                                                            
## 5                                                            
##   TYPE_OF_FORCE_USED6 TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8
## 1          ForceType6          ForceType7          ForceType8
## 2                                                            
## 3                                                            
## 4                                                            
## 5                                                            
##   TYPE_OF_FORCE_USED9 TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 1          ForceType9          ForceType10       Cycles_Num      ForceEffec
## 2                                                      NULL             Yes
## 3                                                      NULL             Yes
## 4                                                      NULL             Yes
## 5                                                      NULL             Yes
df<-df[-1,]
head(df,5)
##   INCIDENT_DATE INCIDENT_TIME    UOF_NUMBER OFFICER_ID OFFICER_GENDER
## 2      09-03-16    4:14:00 AM         37702      10810           Male
## 3       3/22/16   11:00:00 PM         33413       7706           Male
## 4       5/22/16    1:29:00 PM         34567      11014           Male
## 5      01-10-16    8:55:00 PM         31460       6692           Male
## 6      11-08-16    2:30:00 AM  37879, 37898       9844           Male
##   OFFICER_RACE OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 2        Black          05-07-14                      2             No
## 3        White          01-08-99                     17            Yes
## 4        Black           5/20/15                      1             No
## 5        Black           7/29/91                     24             No
## 6        White          10-04-09                      7             No
##            OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 2 No injuries noted or visible                      No      46424        Black
## 3                Sprain/Strain                     Yes      44324     Hispanic
## 4 No injuries noted or visible                      No      45126     Hispanic
## 5 No injuries noted or visible                      No      43150     Hispanic
## 6 No injuries noted or visible                      No      47307        Black
##   SUBJECT_GENDER SUBJECT_INJURY          SUBJECT_INJURY_TYPE
## 2         Female            Yes      Non-Visible Injury/Pain
## 3           Male             No No injuries noted or visible
## 4           Male             No No injuries noted or visible
## 5           Male            Yes               Laceration/Cut
## 6           Male             No No injuries noted or visible
##   SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION          SUBJECT_OFFENSE
## 2                  Yes   Mentally unstable                    APOWW
## 3                  Yes   Mentally unstable                    APOWW
## 4                  Yes             Unknown                    APOWW
## 5                  Yes FD-Unknown if Armed           Evading Arrest
## 6                  Yes             Unknown Other Misdemeanor Arrest
##   REPORTING_AREA BEAT SECTOR      DIVISION LOCATION_DISTRICT STREET_NUMBER
## 2           2062  134    130       CENTRAL               D14           211
## 3           1197  237    230     NORTHEAST                D9          7647
## 4           4153  432    430     SOUTHWEST                D6           716
## 5           4523  641    640 NORTH CENTRAL               D11          5600
## 6           2167  346    340     SOUTHEAST                D7          4600
##    STREET_NAME STREET_DIRECTION STREET_TYPE
## 2        Ervay                N         St.
## 3     Ferguson             NULL         Rd.
## 4 bimebella dr             NULL         Ln.
## 5          LBJ             NULL       Frwy.
## 6    Malcolm X                S       Blvd.
##   LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY LOCATION_STATE
## 2                               211 N ERVAY ST        Dallas             TX
## 3                             7647 FERGUSON RD        Dallas             TX
## 4                             716 BIMEBELLA LN        Dallas             TX
## 5                               5600 L B J FWY        Dallas             TX
## 6                        4600 S MALCOLM X BLVD        Dallas             TX
##   LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON REASON_FOR_FORCE
## 2         32.782205         -96.797461          Arrest           Arrest
## 3         32.798978         -96.717493          Arrest           Arrest
## 4          32.73971          -96.92519          Arrest           Arrest
## 5                                               Arrest           Arrest
## 6                                               Arrest           Arrest
##     TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## 2 Hand/Arm/Elbow Strike                                        
## 3           Joint Locks                                        
## 4     Take Down - Group                                        
## 5        K-9 Deployment                                        
## 6        Verbal Command     Take Down - Arm                    
##   TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## 2                                                            
## 3                                                            
## 4                                                            
## 5                                                            
## 6                                                            
##   TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## 2                                                            
## 3                                                            
## 4                                                            
## 5                                                            
## 6                                                            
##   TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 2                                  NULL             Yes
## 3                                  NULL             Yes
## 4                                  NULL             Yes
## 5                                  NULL             Yes
## 6                                  NULL         No, Yes
str(df)
## 'data.frame':    2383 obs. of  47 variables:
##  $ INCIDENT_DATE                               : chr  "09-03-16" "3/22/16" "5/22/16" "01-10-16" ...
##  $ INCIDENT_TIME                               : chr  "4:14:00 AM" "11:00:00 PM" "1:29:00 PM" "8:55:00 PM" ...
##  $ UOF_NUMBER                                  : chr  "37702" "33413" "34567" "31460" ...
##  $ OFFICER_ID                                  : chr  "10810" "7706" "11014" "6692" ...
##  $ OFFICER_GENDER                              : chr  "Male" "Male" "Male" "Male" ...
##  $ OFFICER_RACE                                : chr  "Black" "White" "Black" "Black" ...
##  $ OFFICER_HIRE_DATE                           : chr  "05-07-14" "01-08-99" "5/20/15" "7/29/91" ...
##  $ OFFICER_YEARS_ON_FORCE                      : chr  "2" "17" "1" "24" ...
##  $ OFFICER_INJURY                              : chr  "No" "Yes" "No" "No" ...
##  $ OFFICER_INJURY_TYPE                         : chr  "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" "No injuries noted or visible" ...
##  $ OFFICER_HOSPITALIZATION                     : chr  "No" "Yes" "No" "No" ...
##  $ SUBJECT_ID                                  : chr  "46424" "44324" "45126" "43150" ...
##  $ SUBJECT_RACE                                : chr  "Black" "Hispanic" "Hispanic" "Hispanic" ...
##  $ SUBJECT_GENDER                              : chr  "Female" "Male" "Male" "Male" ...
##  $ SUBJECT_INJURY                              : chr  "Yes" "No" "No" "Yes" ...
##  $ SUBJECT_INJURY_TYPE                         : chr  "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" "Laceration/Cut" ...
##  $ SUBJECT_WAS_ARRESTED                        : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ SUBJECT_DESCRIPTION                         : chr  "Mentally unstable" "Mentally unstable" "Unknown" "FD-Unknown if Armed" ...
##  $ SUBJECT_OFFENSE                             : chr  "APOWW" "APOWW" "APOWW" "Evading Arrest" ...
##  $ REPORTING_AREA                              : chr  "2062" "1197" "4153" "4523" ...
##  $ BEAT                                        : chr  "134" "237" "432" "641" ...
##  $ SECTOR                                      : chr  "130" "230" "430" "640" ...
##  $ DIVISION                                    : chr  "CENTRAL" "NORTHEAST" "SOUTHWEST" "NORTH CENTRAL" ...
##  $ LOCATION_DISTRICT                           : chr  "D14" "D9" "D6" "D11" ...
##  $ STREET_NUMBER                               : chr  "211" "7647" "716" "5600" ...
##  $ STREET_NAME                                 : chr  "Ervay" "Ferguson" "bimebella dr" "LBJ" ...
##  $ STREET_DIRECTION                            : chr  "N" "NULL" "NULL" "NULL" ...
##  $ STREET_TYPE                                 : chr  "St." "Rd." "Ln." "Frwy." ...
##  $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr  "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" "5600 L B J FWY" ...
##  $ LOCATION_CITY                               : chr  "Dallas" "Dallas" "Dallas" "Dallas" ...
##  $ LOCATION_STATE                              : chr  "TX" "TX" "TX" "TX" ...
##  $ LOCATION_LATITUDE                           : chr  "32.782205" "32.798978" "32.73971" "" ...
##  $ LOCATION_LONGITUDE                          : chr  "-96.797461" "-96.717493" "-96.92519" "" ...
##  $ INCIDENT_REASON                             : chr  "Arrest" "Arrest" "Arrest" "Arrest" ...
##  $ REASON_FOR_FORCE                            : chr  "Arrest" "Arrest" "Arrest" "Arrest" ...
##  $ TYPE_OF_FORCE_USED1                         : chr  "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" "K-9 Deployment" ...
##  $ TYPE_OF_FORCE_USED2                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED3                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED4                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED5                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED6                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED7                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED8                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED9                         : chr  "" "" "" "" ...
##  $ TYPE_OF_FORCE_USED10                        : chr  "" "" "" "" ...
##  $ NUMBER_EC_CYCLES                            : chr  "NULL" "NULL" "NULL" "NULL" ...
##  $ FORCE_EFFECTIVE                             : chr  " Yes" " Yes" " Yes" " Yes" ...
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df<-df %>% mutate_if(is.character, as.factor)
str(df)
## 'data.frame':    2383 obs. of  47 variables:
##  $ INCIDENT_DATE                               : Factor w/ 353 levels "01-01-16","01-02-16",..: 98 236 272 10 161 105 178 230 214 203 ...
##  $ INCIDENT_TIME                               : Factor w/ 543 levels "1:00:00 AM","1:00:00 PM",..: 285 96 26 494 203 423 499 453 520 493 ...
##  $ UOF_NUMBER                                  : Factor w/ 2328 levels " 31138, 31139",..: 2227 1804 1939 1626 1440 2150 2298 1782 1870 1587 ...
##  $ OFFICER_ID                                  : Factor w/ 1041 levels "0","10004","10005",..: 288 651 406 562 993 999 1007 828 111 968 ...
##  $ OFFICER_GENDER                              : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 1 2 2 ...
##  $ OFFICER_RACE                                : Factor w/ 6 levels "American Ind",..: 3 6 3 3 6 6 6 3 4 6 ...
##  $ OFFICER_HIRE_DATE                           : Factor w/ 291 levels "01-03-97","01-04-06",..: 53 7 207 239 114 66 66 70 55 5 ...
##  $ OFFICER_YEARS_ON_FORCE                      : Factor w/ 36 levels "0","1","10","11",..: 13 10 2 18 34 34 34 36 31 35 ...
##  $ OFFICER_INJURY                              : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
##  $ OFFICER_INJURY_TYPE                         : Factor w/ 76 levels "Abrasion/Scrape",..: 52 73 52 52 52 52 52 52 52 52 ...
##  $ OFFICER_HOSPITALIZATION                     : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
##  $ SUBJECT_ID                                  : Factor w/ 1433 levels "0","10157","10236",..: 1120 554 786 228 1315 1154 1371 510 385 1419 ...
##  $ SUBJECT_RACE                                : Factor w/ 7 levels "American Ind",..: 3 4 4 4 3 7 3 7 3 7 ...
##  $ SUBJECT_GENDER                              : Factor w/ 4 levels "Female","Male",..: 1 2 2 2 2 1 2 1 1 1 ...
##  $ SUBJECT_INJURY                              : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 1 1 1 2 1 ...
##  $ SUBJECT_INJURY_TYPE                         : Factor w/ 193 levels "Abrasion/Scrape",..: 125 122 122 105 122 122 122 122 1 122 ...
##  $ SUBJECT_WAS_ARRESTED                        : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
##  $ SUBJECT_DESCRIPTION                         : Factor w/ 15 levels "Alchohol","Alchohol and unknown drugs",..: 11 11 14 9 14 14 12 11 11 2 ...
##  $ SUBJECT_OFFENSE                             : Factor w/ 551 levels "APOWW","APOWW, Assault/FV",..: 1 1 1 255 354 35 8 1 1 1 ...
##  $ REPORTING_AREA                              : Factor w/ 576 levels "1001","1003",..: 184 100 365 498 230 63 175 312 191 475 ...
##  $ BEAT                                        : Factor w/ 227 levels "111","112","113",..: 15 53 124 184 100 51 13 146 14 173 ...
##  $ SECTOR                                      : Factor w/ 35 levels "110","120","130",..: 3 8 18 29 14 8 3 21 3 26 ...
##  $ DIVISION                                    : Factor w/ 7 levels "CENTRAL","NORTH CENTRAL",..: 1 3 7 2 6 3 1 4 1 2 ...
##  $ LOCATION_DISTRICT                           : Factor w/ 14 levels "D1","D10","D11",..: 6 14 11 3 12 14 6 11 6 4 ...
##  $ STREET_NUMBER                               : Factor w/ 856 levels "0","100","1000",..: 225 711 677 591 509 95 553 520 351 198 ...
##  $ STREET_NAME                                 : Factor w/ 1080 levels "12th","12TH",..: 298 313 79 536 594 760 23 578 524 252 ...
##  $ STREET_DIRECTION                            : Factor w/ 5 levels "E","N","NULL",..: 2 3 3 3 4 3 2 3 4 3 ...
##  $ STREET_TYPE                                 : Factor w/ 22 levels "Ave","Ave.","Blvd.",..: 20 17 13 10 3 17 20 13 20 15 ...
##  $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: Factor w/ 1322 levels "100 E LEDBETTER DR",..: 379 1135 1094 976 849 151 929 866 591 314 ...
##  $ LOCATION_CITY                               : Factor w/ 1 level "Dallas": 1 1 1 1 1 1 1 1 1 1 ...
##  $ LOCATION_STATE                              : Factor w/ 1 level "TX": 1 1 1 1 1 1 1 1 1 1 ...
##  $ LOCATION_LATITUDE                           : Factor w/ 1283 levels "","32.633183",..: 602 758 334 1 1 906 632 782 528 1 ...
##  $ LOCATION_LONGITUDE                          : Factor w/ 1283 levels "","-96.574419",..: 731 244 1276 1 1 161 752 1176 803 1 ...
##  $ INCIDENT_REASON                             : Factor w/ 14 levels "Accidental Discharge",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ REASON_FOR_FORCE                            : Factor w/ 12 levels "Active Aggression",..: 3 3 3 3 3 3 3 3 7 3 ...
##  $ TYPE_OF_FORCE_USED1                         : Factor w/ 29 levels "Baton Display",..: 10 13 24 14 28 9 29 12 4 12 ...
##  $ TYPE_OF_FORCE_USED2                         : Factor w/ 27 levels "","Baton Display",..: 1 1 1 1 20 1 1 1 1 9 ...
##  $ TYPE_OF_FORCE_USED3                         : Factor w/ 25 levels "","Baton Strike/Closed Mode",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED4                         : Factor w/ 23 levels "","Baton Strike/Open Mode",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED5                         : Factor w/ 22 levels "","Baton Display",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED6                         : Factor w/ 18 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED7                         : Factor w/ 14 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED8                         : Factor w/ 6 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED9                         : Factor w/ 2 levels "","Verbal Command": 1 1 1 1 1 1 1 1 1 1 ...
##  $ TYPE_OF_FORCE_USED10                        : Factor w/ 2 levels "","BD - Grabbed": 1 1 1 1 1 1 1 1 1 1 ...
##  $ NUMBER_EC_CYCLES                            : Factor w/ 12 levels " 1, 1"," 2, 4",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ FORCE_EFFECTIVE                             : Factor w/ 104 levels " Limited"," Limited, No",..: 69 69 69 69 42 69 69 69 69 83 ...

The data set contains 47 columns and 2348 observations in the data. The data set contains following names of the data set. The data set contains duplicate column names, befor proceed further secound row from the data set will be removed. To get insight of the data set structure of the data set was acessed by using the str function of R. All columns in the data set was in character format. The data exploartion was done by using the available library of dataexplorer.

Data Exploration

library(DataExplorer)
introduce(df)
##   rows columns discrete_columns continuous_columns all_missing_columns
## 1 2383      47               47                  0                   0
##   total_missing_values complete_rows total_observations memory_usage
## 1                    0          2383             112001      1505824
plot_bar(df)
## 17 columns ignored with more than 50 categories.
## INCIDENT_DATE: 353 categories
## INCIDENT_TIME: 543 categories
## UOF_NUMBER: 2328 categories
## OFFICER_ID: 1041 categories
## OFFICER_HIRE_DATE: 291 categories
## OFFICER_INJURY_TYPE: 76 categories
## SUBJECT_ID: 1433 categories
## SUBJECT_INJURY_TYPE: 193 categories
## SUBJECT_OFFENSE: 551 categories
## REPORTING_AREA: 576 categories
## BEAT: 227 categories
## STREET_NUMBER: 856 categories
## STREET_NAME: 1080 categories
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: 1322 categories
## LOCATION_LATITUDE: 1283 categories
## LOCATION_LONGITUDE: 1283 categories
## FORCE_EFFECTIVE: 104 categories

plot_intro(df)

plot_str(df)

The introducefunction was used to get insight of the data and results showed that data contains 47 discreate columns, no missing value.

Is Gender of duty officer leads to use of force

library(ggplot2)
df %>% select(OFFICER_GENDER, OFFICER_INJURY) %>% group_by(OFFICER_GENDER) %>% count() %>% ggplot() +
  aes(x = OFFICER_GENDER, weight = n) +
  geom_bar(fill = "#112446") +
  theme_minimal()

df %>% select(OFFICER_GENDER, OFFICER_INJURY) %>% group_by(OFFICER_INJURY) %>% count() %>% ggplot() +
  aes(x = OFFICER_INJURY, weight = n) +
  geom_bar(fill = "#112446") +
  theme_minimal()

The results showed that female was less imvolved in the injury as compared to the males officers. in 2149 incidents no officer injured while 234 incidents leads to officer injury.

Which officer was more injured in the incident

df %>% select(OFFICER_ID, OFFICER_INJURY) %>% group_by(OFFICER_ID) %>% count() %>% arrange(desc(n)) %>% head(10) %>% 
  ggplot() +
  aes(x = OFFICER_ID, weight = n) +
  geom_bar(fill = "#112446") +
  theme_minimal()

The officer ID 10724.00 was 25 time,10697.00 was 21 and 10710.00 was 18 time injured.

Is there any association between officer and subject Race.

df %>% select(OFFICER_RACE, OFFICER_INJURY, SUBJECT_RACE) %>% group_by(OFFICER_RACE,SUBJECT_RACE) %>% count() %>% arrange(desc(n)) %>% head(10) %>%
  ggplot() +
  aes(x = OFFICER_RACE, fill = SUBJECT_RACE, weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  theme_minimal()

The resultshowed that there is link between them as results hsowed that if the officer was white and subject was black the officer was injured and it happens 846 as comparet to the if both are white then incidents was happend 287 time. The less incident ratio was observed in other other race groups.

Is there any association between officer and subject Race in comparison to officer and subject gender.

df %>% select(OFFICER_RACE, OFFICER_INJURY, SUBJECT_RACE, OFFICER_GENDER) %>% group_by(OFFICER_RACE,SUBJECT_RACE,OFFICER_GENDER) %>% count() %>% arrange(desc(n)) %>% head(10)%>% ggplot() +
  aes(x = OFFICER_RACE, fill = SUBJECT_RACE, weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  theme_minimal() +
  facet_wrap(vars(OFFICER_GENDER))

The results showed that male was officer leads to injury as compared to female officers.

Senior officer ration in comparsion with incident injury

df %>% select(OFFICER_INJURY, OFFICER_YEARS_ON_FORCE) %>% group_by(OFFICER_YEARS_ON_FORCE) %>% count() %>% arrange(desc(n)) %>% head(10) %>% ggplot() +
  aes(x = OFFICER_YEARS_ON_FORCE,weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  theme_minimal() 

The results showed that senior officer were less injured as comapred to the junior officers. As the experince of the officers increased icident injury was also decreased.

Comparison of injury and injury type with experince of Officers.

df %>% select(OFFICER_INJURY, OFFICER_INJURY_TYPE,OFFICER_GENDER) %>% group_by(OFFICER_INJURY,OFFICER_INJURY_TYPE,OFFICER_GENDER) %>% count() %>% arrange(desc(n)) %>% head(10) %>% ggplot() +
  aes(x = OFFICER_INJURY_TYPE, fill = OFFICER_INJURY, weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  theme_minimal() +coord_flip()

The officer was not injured was ratio was higher in te data set as compared to the others.

If the officer injured in the incident, what will be impact on subject

df %>% filter(OFFICER_INJURY=="Yes") %>% select(OFFICER_INJURY_TYPE, SUBJECT_INJURY_TYPE) %>% 
  group_by(SUBJECT_INJURY_TYPE,OFFICER_INJURY_TYPE) %>% count() %>% arrange(desc(n))
## # A tibble: 139 × 3
## # Groups:   SUBJECT_INJURY_TYPE, OFFICER_INJURY_TYPE [139]
##    SUBJECT_INJURY_TYPE          OFFICER_INJURY_TYPE                 n
##    <fct>                        <fct>                           <int>
##  1 No injuries noted or visible Abrasion/Scrape                    23
##  2 No injuries noted or visible No injuries noted or visible       20
##  3 Abrasion/Scrape              Abrasion/Scrape                    18
##  4 Abrasion/Scrape              No injuries noted or visible        8
##  5 No injuries noted or visible Laceration/Cut                      8
##  6 No injuries noted or visible Sprain/Strain                       7
##  7 No injuries noted or visible Bruise                              5
##  8 No injuries noted or visible Fluid Exposure                      4
##  9 Abrasion/Scrape              Redness/Swelling                    3
## 10 No injuries noted or visible Laceration/Cut, Abrasion/Scrape     3
## # ℹ 129 more rows

Thereis no link between the officer injury and subject.

Which incident will leads to more injury of officer and subject

df %>% filter(OFFICER_INJURY=="Yes")%>% select(SUBJECT_INJURY_TYPE, INCIDENT_REASON) %>% group_by(SUBJECT_INJURY_TYPE,INCIDENT_REASON)%>% count() %>% arrange(desc(n)) %>% ggplot() +
  aes(
    x = SUBJECT_INJURY_TYPE,
    fill = INCIDENT_REASON,
    weight = n
  ) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  coord_flip() +
  theme_minimal()

At which streat mostly officer injured

df %>% filter(OFFICER_INJURY=="Yes")%>% select(OFFICER_INJURY, STREET_NAME) %>% group_by(STREET_NAME)%>% count() %>% arrange(desc(n)) %>% head(30) %>%  ggplot() +
  aes(x = STREET_NAME, y = n) +
  geom_jitter(size = 1.5) +
  coord_flip() +
  theme_minimal()

at Ferguson officer injury was 4,Robert B. Cullum (4), ZANG was 4 and Buckner (3).

Reason behind use of force.

df %>% select(INCIDENT_REASON, REASON_FOR_FORCE) %>% group_by(INCIDENT_REASON)%>% count(REASON_FOR_FORCE) %>% arrange(desc(n)) %>% head(30)
## # A tibble: 30 × 3
## # Groups:   INCIDENT_REASON [10]
##    INCIDENT_REASON REASON_FOR_FORCE             n
##    <fct>           <fct>                    <int>
##  1 Arrest          Arrest                     656
##  2 Service Call    Arrest                     214
##  3 Arrest          Active Aggression          200
##  4 Service Call    Danger to self or others   139
##  5 Arrest          Danger to self or others   136
##  6 Service Call    Detention/Frisk            101
##  7 Service Call    Active Aggression           84
##  8 Call for Cover  Arrest                      65
##  9 Service Call    Weapon Display              65
## 10 Arrest          Weapon Display              51
## # ℹ 20 more rows

Mostly reason of the incident was due to the arrest and service call.

At which location mostly officer injured

df %>% filter(OFFICER_INJURY=="Yes")%>%select(OFFICER_INJURY,LOCATION_LONGITUDE, LOCATION_LATITUDE) %>% group_by(LOCATION_LONGITUDE, LOCATION_LATITUDE) %>%  count()
## # A tibble: 179 × 3
## # Groups:   LOCATION_LONGITUDE, LOCATION_LATITUDE [179]
##    LOCATION_LONGITUDE LOCATION_LATITUDE     n
##    <fct>              <fct>             <int>
##  1 ""                 ""                    3
##  2 "-96.575922"       "32.697998"           1
##  3 "-96.639841"       "32.736656"           1
##  4 "-96.653338"       "32.852157"           1
##  5 "-96.656962"       "32.733685"           1
##  6 "-96.657098"       "32.675901"           1
##  7 "-96.662901"       "32.864019"           1
##  8 "-96.6649"         "32.842204"           1
##  9 "-96.664932"       "32.714021"           1
## 10 "-96.665603"       "32.864025"           1
## # ℹ 169 more rows

At the given location mostly officer injured in the incident.

Comaprison between Beat, officer and subject injury with gender

df %>% filter(OFFICER_INJURY=="Yes")%>% select(BEAT, OFFICER_INJURY, SUBJECT_INJURY, OFFICER_GENDER, SUBJECT_GENDER) %>% group_by(BEAT,OFFICER_GENDER, OFFICER_INJURY) %>% count() %>% head(30) %>% ggplot() +
  aes(x = BEAT, fill = OFFICER_GENDER, weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  coord_flip() +
  theme_minimal()

df %>% filter(OFFICER_INJURY=="Yes")%>% select(BEAT, OFFICER_INJURY, SUBJECT_INJURY, OFFICER_GENDER, SUBJECT_GENDER) %>% group_by(BEAT,SUBJECT_GENDER, SUBJECT_INJURY) %>% count() %>% head(30) %>% ggplot() +
  aes(x = BEAT, fill = SUBJECT_GENDER, weight = n) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  coord_flip() +
  theme_minimal()