Transform your raw data into actionable insights. Let my expertise in R and advanced data analysis techniques unlock the power of your information. Get a personalized consultation and see how I can streamline your projects, saving you time and driving better decision-making. Contact me today at info@data03.online or visit to schedule a call.
df <- read.csv("37-00049_UOF-P_2016_prepped.csv")
Load the data set in R and called it as df.
dim(df)
## [1] 2384 47
names(df)
## [1] "INCIDENT_DATE"
## [2] "INCIDENT_TIME"
## [3] "UOF_NUMBER"
## [4] "OFFICER_ID"
## [5] "OFFICER_GENDER"
## [6] "OFFICER_RACE"
## [7] "OFFICER_HIRE_DATE"
## [8] "OFFICER_YEARS_ON_FORCE"
## [9] "OFFICER_INJURY"
## [10] "OFFICER_INJURY_TYPE"
## [11] "OFFICER_HOSPITALIZATION"
## [12] "SUBJECT_ID"
## [13] "SUBJECT_RACE"
## [14] "SUBJECT_GENDER"
## [15] "SUBJECT_INJURY"
## [16] "SUBJECT_INJURY_TYPE"
## [17] "SUBJECT_WAS_ARRESTED"
## [18] "SUBJECT_DESCRIPTION"
## [19] "SUBJECT_OFFENSE"
## [20] "REPORTING_AREA"
## [21] "BEAT"
## [22] "SECTOR"
## [23] "DIVISION"
## [24] "LOCATION_DISTRICT"
## [25] "STREET_NUMBER"
## [26] "STREET_NAME"
## [27] "STREET_DIRECTION"
## [28] "STREET_TYPE"
## [29] "LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION"
## [30] "LOCATION_CITY"
## [31] "LOCATION_STATE"
## [32] "LOCATION_LATITUDE"
## [33] "LOCATION_LONGITUDE"
## [34] "INCIDENT_REASON"
## [35] "REASON_FOR_FORCE"
## [36] "TYPE_OF_FORCE_USED1"
## [37] "TYPE_OF_FORCE_USED2"
## [38] "TYPE_OF_FORCE_USED3"
## [39] "TYPE_OF_FORCE_USED4"
## [40] "TYPE_OF_FORCE_USED5"
## [41] "TYPE_OF_FORCE_USED6"
## [42] "TYPE_OF_FORCE_USED7"
## [43] "TYPE_OF_FORCE_USED8"
## [44] "TYPE_OF_FORCE_USED9"
## [45] "TYPE_OF_FORCE_USED10"
## [46] "NUMBER_EC_CYCLES"
## [47] "FORCE_EFFECTIVE"
head(df,5)
## INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID OFFICER_GENDER OFFICER_RACE
## 1 OCCURRED_D OCCURRED_T UOFNum CURRENT_BA OffSex OffRace
## 2 09-03-16 4:14:00 AM 37702 10810 Male Black
## 3 3/22/16 11:00:00 PM 33413 7706 Male White
## 4 5/22/16 1:29:00 PM 34567 11014 Male Black
## 5 01-10-16 8:55:00 PM 31460 6692 Male Black
## OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 1 HIRE_DT INCIDENT_DATE_LESS_ OFF_INJURE
## 2 05-07-14 2 No
## 3 01-08-99 17 Yes
## 4 5/20/15 1 No
## 5 7/29/91 24 No
## OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 1 OFF_INJURE_DESC OFF_HOSPIT CitNum CitRace
## 2 No injuries noted or visible No 46424 Black
## 3 Sprain/Strain Yes 44324 Hispanic
## 4 No injuries noted or visible No 45126 Hispanic
## 5 No injuries noted or visible No 43150 Hispanic
## SUBJECT_GENDER SUBJECT_INJURY SUBJECT_INJURY_TYPE
## 1 CitSex CIT_INJURE SUBJ_INJURE_DESC
## 2 Female Yes Non-Visible Injury/Pain
## 3 Male No No injuries noted or visible
## 4 Male No No injuries noted or visible
## 5 Male Yes Laceration/Cut
## SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION SUBJECT_OFFENSE REPORTING_AREA BEAT
## 1 CIT_ARREST CIT_INFL_A CitChargeT RA BEAT
## 2 Yes Mentally unstable APOWW 2062 134
## 3 Yes Mentally unstable APOWW 1197 237
## 4 Yes Unknown APOWW 4153 432
## 5 Yes FD-Unknown if Armed Evading Arrest 4523 641
## SECTOR DIVISION LOCATION_DISTRICT STREET_NUMBER STREET_NAME
## 1 SECTOR DIVISION DIST_NAME STREET_N STREET
## 2 130 CENTRAL D14 211 Ervay
## 3 230 NORTHEAST D9 7647 Ferguson
## 4 430 SOUTHWEST D6 716 bimebella dr
## 5 640 NORTH CENTRAL D11 5600 LBJ
## STREET_DIRECTION STREET_TYPE LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION
## 1 street_g street_t Street Address
## 2 N St. 211 N ERVAY ST
## 3 NULL Rd. 7647 FERGUSON RD
## 4 NULL Ln. 716 BIMEBELLA LN
## 5 NULL Frwy. 5600 L B J FWY
## LOCATION_CITY LOCATION_STATE LOCATION_LATITUDE LOCATION_LONGITUDE
## 1 City State Latitude Longitude
## 2 Dallas TX 32.782205 -96.797461
## 3 Dallas TX 32.798978 -96.717493
## 4 Dallas TX 32.73971 -96.92519
## 5 Dallas TX
## INCIDENT_REASON REASON_FOR_FORCE TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2
## 1 SERVICE_TY UOF_REASON ForceType1 ForceType2
## 2 Arrest Arrest Hand/Arm/Elbow Strike
## 3 Arrest Arrest Joint Locks
## 4 Arrest Arrest Take Down - Group
## 5 Arrest Arrest K-9 Deployment
## TYPE_OF_FORCE_USED3 TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5
## 1 ForceType3 ForceType4 ForceType5
## 2
## 3
## 4
## 5
## TYPE_OF_FORCE_USED6 TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8
## 1 ForceType6 ForceType7 ForceType8
## 2
## 3
## 4
## 5
## TYPE_OF_FORCE_USED9 TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 1 ForceType9 ForceType10 Cycles_Num ForceEffec
## 2 NULL Yes
## 3 NULL Yes
## 4 NULL Yes
## 5 NULL Yes
df<-df[-1,]
head(df,5)
## INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID OFFICER_GENDER
## 2 09-03-16 4:14:00 AM 37702 10810 Male
## 3 3/22/16 11:00:00 PM 33413 7706 Male
## 4 5/22/16 1:29:00 PM 34567 11014 Male
## 5 01-10-16 8:55:00 PM 31460 6692 Male
## 6 11-08-16 2:30:00 AM 37879, 37898 9844 Male
## OFFICER_RACE OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 2 Black 05-07-14 2 No
## 3 White 01-08-99 17 Yes
## 4 Black 5/20/15 1 No
## 5 Black 7/29/91 24 No
## 6 White 10-04-09 7 No
## OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 2 No injuries noted or visible No 46424 Black
## 3 Sprain/Strain Yes 44324 Hispanic
## 4 No injuries noted or visible No 45126 Hispanic
## 5 No injuries noted or visible No 43150 Hispanic
## 6 No injuries noted or visible No 47307 Black
## SUBJECT_GENDER SUBJECT_INJURY SUBJECT_INJURY_TYPE
## 2 Female Yes Non-Visible Injury/Pain
## 3 Male No No injuries noted or visible
## 4 Male No No injuries noted or visible
## 5 Male Yes Laceration/Cut
## 6 Male No No injuries noted or visible
## SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION SUBJECT_OFFENSE
## 2 Yes Mentally unstable APOWW
## 3 Yes Mentally unstable APOWW
## 4 Yes Unknown APOWW
## 5 Yes FD-Unknown if Armed Evading Arrest
## 6 Yes Unknown Other Misdemeanor Arrest
## REPORTING_AREA BEAT SECTOR DIVISION LOCATION_DISTRICT STREET_NUMBER
## 2 2062 134 130 CENTRAL D14 211
## 3 1197 237 230 NORTHEAST D9 7647
## 4 4153 432 430 SOUTHWEST D6 716
## 5 4523 641 640 NORTH CENTRAL D11 5600
## 6 2167 346 340 SOUTHEAST D7 4600
## STREET_NAME STREET_DIRECTION STREET_TYPE
## 2 Ervay N St.
## 3 Ferguson NULL Rd.
## 4 bimebella dr NULL Ln.
## 5 LBJ NULL Frwy.
## 6 Malcolm X S Blvd.
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY LOCATION_STATE
## 2 211 N ERVAY ST Dallas TX
## 3 7647 FERGUSON RD Dallas TX
## 4 716 BIMEBELLA LN Dallas TX
## 5 5600 L B J FWY Dallas TX
## 6 4600 S MALCOLM X BLVD Dallas TX
## LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON REASON_FOR_FORCE
## 2 32.782205 -96.797461 Arrest Arrest
## 3 32.798978 -96.717493 Arrest Arrest
## 4 32.73971 -96.92519 Arrest Arrest
## 5 Arrest Arrest
## 6 Arrest Arrest
## TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## 2 Hand/Arm/Elbow Strike
## 3 Joint Locks
## 4 Take Down - Group
## 5 K-9 Deployment
## 6 Verbal Command Take Down - Arm
## TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## 2
## 3
## 4
## 5
## 6
## TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## 2
## 3
## 4
## 5
## 6
## TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 2 NULL Yes
## 3 NULL Yes
## 4 NULL Yes
## 5 NULL Yes
## 6 NULL No, Yes
str(df)
## 'data.frame': 2383 obs. of 47 variables:
## $ INCIDENT_DATE : chr "09-03-16" "3/22/16" "5/22/16" "01-10-16" ...
## $ INCIDENT_TIME : chr "4:14:00 AM" "11:00:00 PM" "1:29:00 PM" "8:55:00 PM" ...
## $ UOF_NUMBER : chr "37702" "33413" "34567" "31460" ...
## $ OFFICER_ID : chr "10810" "7706" "11014" "6692" ...
## $ OFFICER_GENDER : chr "Male" "Male" "Male" "Male" ...
## $ OFFICER_RACE : chr "Black" "White" "Black" "Black" ...
## $ OFFICER_HIRE_DATE : chr "05-07-14" "01-08-99" "5/20/15" "7/29/91" ...
## $ OFFICER_YEARS_ON_FORCE : chr "2" "17" "1" "24" ...
## $ OFFICER_INJURY : chr "No" "Yes" "No" "No" ...
## $ OFFICER_INJURY_TYPE : chr "No injuries noted or visible" "Sprain/Strain" "No injuries noted or visible" "No injuries noted or visible" ...
## $ OFFICER_HOSPITALIZATION : chr "No" "Yes" "No" "No" ...
## $ SUBJECT_ID : chr "46424" "44324" "45126" "43150" ...
## $ SUBJECT_RACE : chr "Black" "Hispanic" "Hispanic" "Hispanic" ...
## $ SUBJECT_GENDER : chr "Female" "Male" "Male" "Male" ...
## $ SUBJECT_INJURY : chr "Yes" "No" "No" "Yes" ...
## $ SUBJECT_INJURY_TYPE : chr "Non-Visible Injury/Pain" "No injuries noted or visible" "No injuries noted or visible" "Laceration/Cut" ...
## $ SUBJECT_WAS_ARRESTED : chr "Yes" "Yes" "Yes" "Yes" ...
## $ SUBJECT_DESCRIPTION : chr "Mentally unstable" "Mentally unstable" "Unknown" "FD-Unknown if Armed" ...
## $ SUBJECT_OFFENSE : chr "APOWW" "APOWW" "APOWW" "Evading Arrest" ...
## $ REPORTING_AREA : chr "2062" "1197" "4153" "4523" ...
## $ BEAT : chr "134" "237" "432" "641" ...
## $ SECTOR : chr "130" "230" "430" "640" ...
## $ DIVISION : chr "CENTRAL" "NORTHEAST" "SOUTHWEST" "NORTH CENTRAL" ...
## $ LOCATION_DISTRICT : chr "D14" "D9" "D6" "D11" ...
## $ STREET_NUMBER : chr "211" "7647" "716" "5600" ...
## $ STREET_NAME : chr "Ervay" "Ferguson" "bimebella dr" "LBJ" ...
## $ STREET_DIRECTION : chr "N" "NULL" "NULL" "NULL" ...
## $ STREET_TYPE : chr "St." "Rd." "Ln." "Frwy." ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: chr "211 N ERVAY ST" "7647 FERGUSON RD" "716 BIMEBELLA LN" "5600 L B J FWY" ...
## $ LOCATION_CITY : chr "Dallas" "Dallas" "Dallas" "Dallas" ...
## $ LOCATION_STATE : chr "TX" "TX" "TX" "TX" ...
## $ LOCATION_LATITUDE : chr "32.782205" "32.798978" "32.73971" "" ...
## $ LOCATION_LONGITUDE : chr "-96.797461" "-96.717493" "-96.92519" "" ...
## $ INCIDENT_REASON : chr "Arrest" "Arrest" "Arrest" "Arrest" ...
## $ REASON_FOR_FORCE : chr "Arrest" "Arrest" "Arrest" "Arrest" ...
## $ TYPE_OF_FORCE_USED1 : chr "Hand/Arm/Elbow Strike" "Joint Locks" "Take Down - Group" "K-9 Deployment" ...
## $ TYPE_OF_FORCE_USED2 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED3 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED4 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED5 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED6 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED7 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED8 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED9 : chr "" "" "" "" ...
## $ TYPE_OF_FORCE_USED10 : chr "" "" "" "" ...
## $ NUMBER_EC_CYCLES : chr "NULL" "NULL" "NULL" "NULL" ...
## $ FORCE_EFFECTIVE : chr " Yes" " Yes" " Yes" " Yes" ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df<-df %>% mutate_if(is.character, as.factor)
str(df)
## 'data.frame': 2383 obs. of 47 variables:
## $ INCIDENT_DATE : Factor w/ 353 levels "01-01-16","01-02-16",..: 98 236 272 10 161 105 178 230 214 203 ...
## $ INCIDENT_TIME : Factor w/ 543 levels "1:00:00 AM","1:00:00 PM",..: 285 96 26 494 203 423 499 453 520 493 ...
## $ UOF_NUMBER : Factor w/ 2328 levels " 31138, 31139",..: 2227 1804 1939 1626 1440 2150 2298 1782 1870 1587 ...
## $ OFFICER_ID : Factor w/ 1041 levels "0","10004","10005",..: 288 651 406 562 993 999 1007 828 111 968 ...
## $ OFFICER_GENDER : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 1 2 2 ...
## $ OFFICER_RACE : Factor w/ 6 levels "American Ind",..: 3 6 3 3 6 6 6 3 4 6 ...
## $ OFFICER_HIRE_DATE : Factor w/ 291 levels "01-03-97","01-04-06",..: 53 7 207 239 114 66 66 70 55 5 ...
## $ OFFICER_YEARS_ON_FORCE : Factor w/ 36 levels "0","1","10","11",..: 13 10 2 18 34 34 34 36 31 35 ...
## $ OFFICER_INJURY : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ OFFICER_INJURY_TYPE : Factor w/ 76 levels "Abrasion/Scrape",..: 52 73 52 52 52 52 52 52 52 52 ...
## $ OFFICER_HOSPITALIZATION : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ SUBJECT_ID : Factor w/ 1433 levels "0","10157","10236",..: 1120 554 786 228 1315 1154 1371 510 385 1419 ...
## $ SUBJECT_RACE : Factor w/ 7 levels "American Ind",..: 3 4 4 4 3 7 3 7 3 7 ...
## $ SUBJECT_GENDER : Factor w/ 4 levels "Female","Male",..: 1 2 2 2 2 1 2 1 1 1 ...
## $ SUBJECT_INJURY : Factor w/ 2 levels "No","Yes": 2 1 1 2 1 1 1 1 2 1 ...
## $ SUBJECT_INJURY_TYPE : Factor w/ 193 levels "Abrasion/Scrape",..: 125 122 122 105 122 122 122 122 1 122 ...
## $ SUBJECT_WAS_ARRESTED : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ SUBJECT_DESCRIPTION : Factor w/ 15 levels "Alchohol","Alchohol and unknown drugs",..: 11 11 14 9 14 14 12 11 11 2 ...
## $ SUBJECT_OFFENSE : Factor w/ 551 levels "APOWW","APOWW, Assault/FV",..: 1 1 1 255 354 35 8 1 1 1 ...
## $ REPORTING_AREA : Factor w/ 576 levels "1001","1003",..: 184 100 365 498 230 63 175 312 191 475 ...
## $ BEAT : Factor w/ 227 levels "111","112","113",..: 15 53 124 184 100 51 13 146 14 173 ...
## $ SECTOR : Factor w/ 35 levels "110","120","130",..: 3 8 18 29 14 8 3 21 3 26 ...
## $ DIVISION : Factor w/ 7 levels "CENTRAL","NORTH CENTRAL",..: 1 3 7 2 6 3 1 4 1 2 ...
## $ LOCATION_DISTRICT : Factor w/ 14 levels "D1","D10","D11",..: 6 14 11 3 12 14 6 11 6 4 ...
## $ STREET_NUMBER : Factor w/ 856 levels "0","100","1000",..: 225 711 677 591 509 95 553 520 351 198 ...
## $ STREET_NAME : Factor w/ 1080 levels "12th","12TH",..: 298 313 79 536 594 760 23 578 524 252 ...
## $ STREET_DIRECTION : Factor w/ 5 levels "E","N","NULL",..: 2 3 3 3 4 3 2 3 4 3 ...
## $ STREET_TYPE : Factor w/ 22 levels "Ave","Ave.","Blvd.",..: 20 17 13 10 3 17 20 13 20 15 ...
## $ LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: Factor w/ 1322 levels "100 E LEDBETTER DR",..: 379 1135 1094 976 849 151 929 866 591 314 ...
## $ LOCATION_CITY : Factor w/ 1 level "Dallas": 1 1 1 1 1 1 1 1 1 1 ...
## $ LOCATION_STATE : Factor w/ 1 level "TX": 1 1 1 1 1 1 1 1 1 1 ...
## $ LOCATION_LATITUDE : Factor w/ 1283 levels "","32.633183",..: 602 758 334 1 1 906 632 782 528 1 ...
## $ LOCATION_LONGITUDE : Factor w/ 1283 levels "","-96.574419",..: 731 244 1276 1 1 161 752 1176 803 1 ...
## $ INCIDENT_REASON : Factor w/ 14 levels "Accidental Discharge",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ REASON_FOR_FORCE : Factor w/ 12 levels "Active Aggression",..: 3 3 3 3 3 3 3 3 7 3 ...
## $ TYPE_OF_FORCE_USED1 : Factor w/ 29 levels "Baton Display",..: 10 13 24 14 28 9 29 12 4 12 ...
## $ TYPE_OF_FORCE_USED2 : Factor w/ 27 levels "","Baton Display",..: 1 1 1 1 20 1 1 1 1 9 ...
## $ TYPE_OF_FORCE_USED3 : Factor w/ 25 levels "","Baton Strike/Closed Mode",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED4 : Factor w/ 23 levels "","Baton Strike/Open Mode",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED5 : Factor w/ 22 levels "","Baton Display",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED6 : Factor w/ 18 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED7 : Factor w/ 14 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED8 : Factor w/ 6 levels "","BD - Grabbed",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED9 : Factor w/ 2 levels "","Verbal Command": 1 1 1 1 1 1 1 1 1 1 ...
## $ TYPE_OF_FORCE_USED10 : Factor w/ 2 levels "","BD - Grabbed": 1 1 1 1 1 1 1 1 1 1 ...
## $ NUMBER_EC_CYCLES : Factor w/ 12 levels " 1, 1"," 2, 4",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ FORCE_EFFECTIVE : Factor w/ 104 levels " Limited"," Limited, No",..: 69 69 69 69 42 69 69 69 69 83 ...
The data set contains 47 columns and 2348 observations in the data. The data set contains following names of the data set. The data set contains duplicate column names, befor proceed further secound row from the data set will be removed. To get insight of the data set structure of the data set was acessed by using the str function of R. All columns in the data set was in character format. The data exploartion was done by using the available library of dataexplorer.
library(DataExplorer)
introduce(df)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 2383 47 47 0 0
## total_missing_values complete_rows total_observations memory_usage
## 1 0 2383 112001 1505824
plot_bar(df)
## 17 columns ignored with more than 50 categories.
## INCIDENT_DATE: 353 categories
## INCIDENT_TIME: 543 categories
## UOF_NUMBER: 2328 categories
## OFFICER_ID: 1041 categories
## OFFICER_HIRE_DATE: 291 categories
## OFFICER_INJURY_TYPE: 76 categories
## SUBJECT_ID: 1433 categories
## SUBJECT_INJURY_TYPE: 193 categories
## SUBJECT_OFFENSE: 551 categories
## REPORTING_AREA: 576 categories
## BEAT: 227 categories
## STREET_NUMBER: 856 categories
## STREET_NAME: 1080 categories
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION: 1322 categories
## LOCATION_LATITUDE: 1283 categories
## LOCATION_LONGITUDE: 1283 categories
## FORCE_EFFECTIVE: 104 categories
plot_intro(df)
plot_str(df)
The introducefunction was used to get insight of the data and results showed that data contains 47 discreate columns, no missing value.
library(ggplot2)
df %>% select(OFFICER_GENDER, OFFICER_INJURY) %>% group_by(OFFICER_GENDER) %>% count() %>% ggplot() +
aes(x = OFFICER_GENDER, weight = n) +
geom_bar(fill = "#112446") +
theme_minimal()
df %>% select(OFFICER_GENDER, OFFICER_INJURY) %>% group_by(OFFICER_INJURY) %>% count() %>% ggplot() +
aes(x = OFFICER_INJURY, weight = n) +
geom_bar(fill = "#112446") +
theme_minimal()
The results showed that female was less imvolved in the injury as compared to the males officers. in 2149 incidents no officer injured while 234 incidents leads to officer injury.
df %>% select(OFFICER_ID, OFFICER_INJURY) %>% group_by(OFFICER_ID) %>% count() %>% arrange(desc(n)) %>% head(10) %>%
ggplot() +
aes(x = OFFICER_ID, weight = n) +
geom_bar(fill = "#112446") +
theme_minimal()
The officer ID 10724.00 was 25 time,10697.00 was 21 and 10710.00 was 18 time injured.
df %>% select(OFFICER_RACE, OFFICER_INJURY) %>% group_by(OFFICER_RACE) %>% count() %>% arrange(desc(n)) %>% head(10) %>%
ggplot() +
aes(x = OFFICER_RACE, weight = n) +
geom_bar(fill = "#112446") +
theme_minimal()
The results showed that white officer was mostly injured in the incident followed by hispanic and black.
df %>% select(OFFICER_RACE, OFFICER_INJURY, SUBJECT_RACE) %>% group_by(OFFICER_RACE,SUBJECT_RACE) %>% count() %>% arrange(desc(n)) %>% head(10) %>%
ggplot() +
aes(x = OFFICER_RACE, fill = SUBJECT_RACE, weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
theme_minimal()
The resultshowed that there is link between them as results hsowed that if the officer was white and subject was black the officer was injured and it happens 846 as comparet to the if both are white then incidents was happend 287 time. The less incident ratio was observed in other other race groups.
df %>% select(OFFICER_RACE, OFFICER_INJURY, SUBJECT_RACE, OFFICER_GENDER) %>% group_by(OFFICER_RACE,SUBJECT_RACE,OFFICER_GENDER) %>% count() %>% arrange(desc(n)) %>% head(10)%>% ggplot() +
aes(x = OFFICER_RACE, fill = SUBJECT_RACE, weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
theme_minimal() +
facet_wrap(vars(OFFICER_GENDER))
The results showed that male was officer leads to injury as compared to female officers.
df %>% select(OFFICER_INJURY, OFFICER_YEARS_ON_FORCE) %>% group_by(OFFICER_YEARS_ON_FORCE) %>% count() %>% arrange(desc(n)) %>% head(10) %>% ggplot() +
aes(x = OFFICER_YEARS_ON_FORCE,weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
theme_minimal()
The results showed that senior officer were less injured as comapred to the junior officers. As the experince of the officers increased icident injury was also decreased.
df %>% select(OFFICER_INJURY, OFFICER_INJURY_TYPE,OFFICER_GENDER) %>% group_by(OFFICER_INJURY,OFFICER_INJURY_TYPE,OFFICER_GENDER) %>% count() %>% arrange(desc(n)) %>% head(10) %>% ggplot() +
aes(x = OFFICER_INJURY_TYPE, fill = OFFICER_INJURY, weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
theme_minimal() +coord_flip()
The officer was not injured was ratio was higher in te data set as compared to the others.
df %>% filter(OFFICER_INJURY=="Yes") %>% select(OFFICER_INJURY_TYPE, SUBJECT_INJURY_TYPE) %>%
group_by(SUBJECT_INJURY_TYPE,OFFICER_INJURY_TYPE) %>% count() %>% arrange(desc(n))
## # A tibble: 139 × 3
## # Groups: SUBJECT_INJURY_TYPE, OFFICER_INJURY_TYPE [139]
## SUBJECT_INJURY_TYPE OFFICER_INJURY_TYPE n
## <fct> <fct> <int>
## 1 No injuries noted or visible Abrasion/Scrape 23
## 2 No injuries noted or visible No injuries noted or visible 20
## 3 Abrasion/Scrape Abrasion/Scrape 18
## 4 Abrasion/Scrape No injuries noted or visible 8
## 5 No injuries noted or visible Laceration/Cut 8
## 6 No injuries noted or visible Sprain/Strain 7
## 7 No injuries noted or visible Bruise 5
## 8 No injuries noted or visible Fluid Exposure 4
## 9 Abrasion/Scrape Redness/Swelling 3
## 10 No injuries noted or visible Laceration/Cut, Abrasion/Scrape 3
## # ℹ 129 more rows
Thereis no link between the officer injury and subject.
df %>% filter(OFFICER_INJURY=="Yes")%>% select(SUBJECT_INJURY_TYPE, INCIDENT_REASON) %>% group_by(SUBJECT_INJURY_TYPE,INCIDENT_REASON)%>% count() %>% arrange(desc(n)) %>% ggplot() +
aes(
x = SUBJECT_INJURY_TYPE,
fill = INCIDENT_REASON,
weight = n
) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
coord_flip() +
theme_minimal()
df %>% filter(OFFICER_INJURY=="Yes")%>% select(OFFICER_INJURY, STREET_NAME) %>% group_by(STREET_NAME)%>% count() %>% arrange(desc(n)) %>% head(30) %>% ggplot() +
aes(x = STREET_NAME, y = n) +
geom_jitter(size = 1.5) +
coord_flip() +
theme_minimal()
at Ferguson officer injury was 4,Robert B. Cullum (4), ZANG was 4 and Buckner (3).
df %>% select(INCIDENT_REASON, REASON_FOR_FORCE) %>% group_by(INCIDENT_REASON)%>% count(REASON_FOR_FORCE) %>% arrange(desc(n)) %>% head(30)
## # A tibble: 30 × 3
## # Groups: INCIDENT_REASON [10]
## INCIDENT_REASON REASON_FOR_FORCE n
## <fct> <fct> <int>
## 1 Arrest Arrest 656
## 2 Service Call Arrest 214
## 3 Arrest Active Aggression 200
## 4 Service Call Danger to self or others 139
## 5 Arrest Danger to self or others 136
## 6 Service Call Detention/Frisk 101
## 7 Service Call Active Aggression 84
## 8 Call for Cover Arrest 65
## 9 Service Call Weapon Display 65
## 10 Arrest Weapon Display 51
## # ℹ 20 more rows
Mostly reason of the incident was due to the arrest and service call.
df %>% filter(OFFICER_INJURY=="Yes")%>%select(OFFICER_INJURY,LOCATION_LONGITUDE, LOCATION_LATITUDE) %>% group_by(LOCATION_LONGITUDE, LOCATION_LATITUDE) %>% count()
## # A tibble: 179 × 3
## # Groups: LOCATION_LONGITUDE, LOCATION_LATITUDE [179]
## LOCATION_LONGITUDE LOCATION_LATITUDE n
## <fct> <fct> <int>
## 1 "" "" 3
## 2 "-96.575922" "32.697998" 1
## 3 "-96.639841" "32.736656" 1
## 4 "-96.653338" "32.852157" 1
## 5 "-96.656962" "32.733685" 1
## 6 "-96.657098" "32.675901" 1
## 7 "-96.662901" "32.864019" 1
## 8 "-96.6649" "32.842204" 1
## 9 "-96.664932" "32.714021" 1
## 10 "-96.665603" "32.864025" 1
## # ℹ 169 more rows
At the given location mostly officer injured in the incident.
df %>% filter(OFFICER_INJURY=="Yes")%>% select(BEAT, OFFICER_INJURY, SUBJECT_INJURY, OFFICER_GENDER, SUBJECT_GENDER) %>% group_by(BEAT,OFFICER_GENDER, OFFICER_INJURY) %>% count() %>% head(30) %>% ggplot() +
aes(x = BEAT, fill = OFFICER_GENDER, weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
coord_flip() +
theme_minimal()
df %>% filter(OFFICER_INJURY=="Yes")%>% select(BEAT, OFFICER_INJURY, SUBJECT_INJURY, OFFICER_GENDER, SUBJECT_GENDER) %>% group_by(BEAT,SUBJECT_GENDER, SUBJECT_INJURY) %>% count() %>% head(30) %>% ggplot() +
aes(x = BEAT, fill = SUBJECT_GENDER, weight = n) +
geom_bar(position = "dodge") +
scale_fill_hue(direction = 1) +
coord_flip() +
theme_minimal()