Requirement1

Summary of raw data

urlfile <-'https://raw.githubusercontent.com/jayleecunysps/AssignmentforSPS/main/Arrests.csv' #bonus

library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Arrests<-read.csv(url(urlfile)) 
Arrests <- data.frame(Arrests)
summary(Arrests) #Requirement1
##    released            colour               year           age       
##  Length:5226        Length:5226        Min.   :1997   Min.   :12.00  
##  Class :character   Class :character   1st Qu.:1998   1st Qu.:18.00  
##  Mode  :character   Mode  :character   Median :2000   Median :21.00  
##                                        Mean   :2000   Mean   :23.85  
##                                        3rd Qu.:2001   3rd Qu.:27.00  
##                                        Max.   :2002   Max.   :66.00  
##      sex              employed           citizen              checks     
##  Length:5226        Length:5226        Length:5226        Min.   :0.000  
##  Class :character   Class :character   Class :character   1st Qu.:0.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.000  
##                                                           Mean   :1.636  
##                                                           3rd Qu.:3.000  
##                                                           Max.   :6.000
aggregate(cbind(age,checks) ~ year,Arrests,mean)
##   year      age   checks
## 1 1997 23.95528 1.837398
## 2 1998 23.67617 1.664766
## 3 1999 24.23385 1.625114
## 4 2000 23.47559 1.605512
## 5 2001 24.07349 1.582164
## 6 2002 23.36462 1.613718
aggregate(cbind(age,checks) ~ year,Arrests,median)
##   year age checks
## 1 1997  21      2
## 2 1998  21      1
## 3 1999  21      1
## 4 2000  21      1
## 5 2001  21      1
## 6 2002  21      1

Requirement2

Subset data

  1. pull the arrestee who is adult and not newly add to police database
  2. pull race equally
Adultarrests <- subset(Arrests,age>17&checks>0) #ageolderthan18 #requirement2
Adultarrests_each100 <- Adultarrests %>% group_by(colour) %>% slice_sample(n=50) #requirement2

Requirement3

Rename Column

colnames(Adultarrests_each100) <- c("released","race","arrest_year","arrest_age","sex","employed","citizen","policedatabase_in_records")

#requirement3

Requirement4

New Summary

The minimum of police database in records and age rise to 1 and 18 due to the data subset, no newly arrested and minor is included in the sample. The mean and median increase for the same reason.

summary(Adultarrests_each100) #requirement4
##    released             race            arrest_year     arrest_age   
##  Length:100         Length:100         Min.   :1997   Min.   :18.00  
##  Class :character   Class :character   1st Qu.:1998   1st Qu.:20.00  
##  Mode  :character   Mode  :character   Median :1999   Median :22.50  
##                                        Mean   :2000   Mean   :24.96  
##                                        3rd Qu.:2001   3rd Qu.:28.00  
##                                        Max.   :2002   Max.   :47.00  
##      sex              employed           citizen         
##  Length:100         Length:100         Length:100        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  policedatabase_in_records
##  Min.   :1.00             
##  1st Qu.:1.00             
##  Median :2.00             
##  Mean   :2.34             
##  3rd Qu.:3.00             
##  Max.   :5.00
aggregate(cbind(arrest_age,policedatabase_in_records) ~ arrest_year,Adultarrests_each100,mean)
##   arrest_year arrest_age policedatabase_in_records
## 1        1997   32.75000                  2.000000
## 2        1998   22.43478                  2.391304
## 3        1999   24.95833                  2.500000
## 4        2000   22.94737                  2.315789
## 5        2001   26.44000                  2.160000
## 6        2002   30.60000                  2.600000
aggregate(cbind(arrest_age,policedatabase_in_records) ~ arrest_year,Adultarrests_each100,median)
##   arrest_year arrest_age policedatabase_in_records
## 1        1997       32.5                       1.5
## 2        1998       21.0                       2.0
## 3        1999       23.0                       3.0
## 4        2000       21.0                       2.0
## 5        2001       23.0                       2.0
## 6        2002       30.0                       2.0

Requirement5

Rerame value

Adultarrests_each100$employed <- as.character(Adultarrests_each100$employed)
Adultarrests_each100$released <- as.character(Adultarrests_each100$released)
Adultarrests_each100$employed [Adultarrests_each100$employed == "Yes"] <- "Employed"
Adultarrests_each100$employed [Adultarrests_each100$employed == "No"] <- "Unemployed"
Adultarrests_each100$released [Adultarrests_each100$released == "Yes"] <- "Released"
Adultarrests_each100$released [Adultarrests_each100$released == "No"] <- "Unreleased"
Adultarrests_each100$employed <- as.factor(Adultarrests_each100$employed)
Adultarrests_each100$released <- as.factor(Adultarrests_each100$released)  #requirement5

Requirement6

sampleoftable <- Adultarrests_each100[sample(nrow(Adultarrests_each100), size=25),]

print(sampleoftable)
## # A tibble: 25 × 8
## # Groups:   race [2]
##    released race  arrest_year arrest_age sex   employed citizen policedatabase_…
##    <fct>    <chr>       <int>      <int> <chr> <fct>    <chr>              <int>
##  1 Released White        1998         18 Male  Employed Yes                    2
##  2 Released Black        1999         19 Male  Employed Yes                    3
##  3 Released Black        2001         21 Male  Employed Yes                    2
##  4 Unrelea… Black        1999         19 Male  Unemplo… Yes                    4
##  5 Released White        1999         18 Fema… Employed Yes                    2
##  6 Released White        2000         39 Male  Employed Yes                    4
##  7 Released Black        1997         27 Male  Employed Yes                    2
##  8 Released White        2000         22 Male  Unemplo… Yes                    1
##  9 Released Black        2001         28 Male  Unemplo… Yes                    3
## 10 Released Black        1999         24 Male  Employed No                     2
## # … with 15 more rows