Summary of raw data
urlfile <-'https://raw.githubusercontent.com/jayleecunysps/AssignmentforSPS/main/Arrests.csv' #bonus
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Arrests<-read.csv(url(urlfile))
Arrests <- data.frame(Arrests)
summary(Arrests) #Requirement1
## released colour year age
## Length:5226 Length:5226 Min. :1997 Min. :12.00
## Class :character Class :character 1st Qu.:1998 1st Qu.:18.00
## Mode :character Mode :character Median :2000 Median :21.00
## Mean :2000 Mean :23.85
## 3rd Qu.:2001 3rd Qu.:27.00
## Max. :2002 Max. :66.00
## sex employed citizen checks
## Length:5226 Length:5226 Length:5226 Min. :0.000
## Class :character Class :character Class :character 1st Qu.:0.000
## Mode :character Mode :character Mode :character Median :1.000
## Mean :1.636
## 3rd Qu.:3.000
## Max. :6.000
aggregate(cbind(age,checks) ~ year,Arrests,mean)
## year age checks
## 1 1997 23.95528 1.837398
## 2 1998 23.67617 1.664766
## 3 1999 24.23385 1.625114
## 4 2000 23.47559 1.605512
## 5 2001 24.07349 1.582164
## 6 2002 23.36462 1.613718
aggregate(cbind(age,checks) ~ year,Arrests,median)
## year age checks
## 1 1997 21 2
## 2 1998 21 1
## 3 1999 21 1
## 4 2000 21 1
## 5 2001 21 1
## 6 2002 21 1
Subset data
Adultarrests <- subset(Arrests,age>17&checks>0) #ageolderthan18 #requirement2
Adultarrests_each100 <- Adultarrests %>% group_by(colour) %>% slice_sample(n=50) #requirement2
Rename Column
colnames(Adultarrests_each100) <- c("released","race","arrest_year","arrest_age","sex","employed","citizen","policedatabase_in_records")
#requirement3
New Summary
The minimum of police database in records and age rise to 1 and 18 due to the data subset, no newly arrested and minor is included in the sample. The mean and median increase for the same reason.
summary(Adultarrests_each100) #requirement4
## released race arrest_year arrest_age
## Length:100 Length:100 Min. :1997 Min. :18.00
## Class :character Class :character 1st Qu.:1998 1st Qu.:20.00
## Mode :character Mode :character Median :1999 Median :22.50
## Mean :2000 Mean :24.96
## 3rd Qu.:2001 3rd Qu.:28.00
## Max. :2002 Max. :47.00
## sex employed citizen
## Length:100 Length:100 Length:100
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## policedatabase_in_records
## Min. :1.00
## 1st Qu.:1.00
## Median :2.00
## Mean :2.34
## 3rd Qu.:3.00
## Max. :5.00
aggregate(cbind(arrest_age,policedatabase_in_records) ~ arrest_year,Adultarrests_each100,mean)
## arrest_year arrest_age policedatabase_in_records
## 1 1997 32.75000 2.000000
## 2 1998 22.43478 2.391304
## 3 1999 24.95833 2.500000
## 4 2000 22.94737 2.315789
## 5 2001 26.44000 2.160000
## 6 2002 30.60000 2.600000
aggregate(cbind(arrest_age,policedatabase_in_records) ~ arrest_year,Adultarrests_each100,median)
## arrest_year arrest_age policedatabase_in_records
## 1 1997 32.5 1.5
## 2 1998 21.0 2.0
## 3 1999 23.0 3.0
## 4 2000 21.0 2.0
## 5 2001 23.0 2.0
## 6 2002 30.0 2.0
Rerame value
Adultarrests_each100$employed <- as.character(Adultarrests_each100$employed)
Adultarrests_each100$released <- as.character(Adultarrests_each100$released)
Adultarrests_each100$employed [Adultarrests_each100$employed == "Yes"] <- "Employed"
Adultarrests_each100$employed [Adultarrests_each100$employed == "No"] <- "Unemployed"
Adultarrests_each100$released [Adultarrests_each100$released == "Yes"] <- "Released"
Adultarrests_each100$released [Adultarrests_each100$released == "No"] <- "Unreleased"
Adultarrests_each100$employed <- as.factor(Adultarrests_each100$employed)
Adultarrests_each100$released <- as.factor(Adultarrests_each100$released) #requirement5
sampleoftable <- Adultarrests_each100[sample(nrow(Adultarrests_each100), size=25),]
print(sampleoftable)
## # A tibble: 25 × 8
## # Groups: race [2]
## released race arrest_year arrest_age sex employed citizen policedatabase_…
## <fct> <chr> <int> <int> <chr> <fct> <chr> <int>
## 1 Released White 1998 18 Male Employed Yes 2
## 2 Released Black 1999 19 Male Employed Yes 3
## 3 Released Black 2001 21 Male Employed Yes 2
## 4 Unrelea… Black 1999 19 Male Unemplo… Yes 4
## 5 Released White 1999 18 Fema… Employed Yes 2
## 6 Released White 2000 39 Male Employed Yes 4
## 7 Released Black 1997 27 Male Employed Yes 2
## 8 Released White 2000 22 Male Unemplo… Yes 1
## 9 Released Black 2001 28 Male Unemplo… Yes 3
## 10 Released Black 1999 24 Male Employed No 2
## # … with 15 more rows