The Stop, Question and Frisk Data from NYPD is available at http://www.nyc.gov/html/nypd/html/analysis_and_planning/stop_question_and_frisk_report.shtml

Download the 2014 archive and specs, unzip, and let the party begins.

Getting Familiar with Data

dat <- read.csv("2014.csv")

First, load 2014 SQF dataset and take a look at the data. There are 45787 rows and 112 columns.

head(dat)
##   year pct ser_num datestop timestop recstat inout trhsloc perobs
## 1 2014  67      15  1012014     2330       1     O       P      1
## 2 2014   7      26  1032014     1530       1     I       T      2
## 3 2014  84      52  1042014     2100       A     I       P      1
## 4 2014  84      13  1092014     1250       1     I       H      1
## 5 2014  77      19  1092014     1310       1     I       P      1
## 6 2014  44      62  1152014     2220       A     O       P      2
##              crimsusp perstop typeofid explnstp othpers arstmade
## 1                 FEL       5        V        Y       Y        N
## 2   CRIMINAL MISCHIEF       5        P        Y       N        Y
## 3 GRAND LARCENY (FEL)      10        P        Y       Y        N
## 4       CRIM TRESPASS       5        P        Y       Y        N
## 5                 FEL       2        P        Y       N        N
## 6 CPW (GRAVITY KNIFE)       5        V        Y       N        Y
##        arstoffn sumissue sumoffen compyear comppct offunif officrid
## 1                      N                 0       0       N         
## 2     PL 170.25        N                 0       0       N         
## 3                      N                 0       0       N         
## 4                      N                 0       0       Y         
## 5                      N                 0       0       N         
## 6 CPW PL 265.01        N                 0       0       Y         
##   frisked searched contrabn adtlrept pistol riflshot asltweap knifcuti
## 1       Y        N        N        N      N        N        N        N
## 2       Y        Y        N        N      N        N        N        N
## 3       Y        N        N        N      N        N        N        N
## 4       Y        N        N        N      N        N        N        N
## 5       N        N        N        N      N        N        N        N
## 6       Y        Y        N        N      N        N        N        Y
##   machgun othrweap pf_hands pf_wall pf_grnd pf_drwep pf_ptwep pf_baton
## 1       N        N        N       N       N        N        N        N
## 2       N        N        Y       N       N        N        N        N
## 3       N        N        N       N       N        N        N        N
## 4       N        N        N       N       N        N        N        N
## 5       N        N        N       N       N        N        N        N
## 6       N        N        N       N       N        N        N        N
##   pf_hcuff pf_pepsp pf_other radio ac_rept ac_inves rf_vcrim rf_othsw
## 1        N        N        N     N       N        N        N        N
## 2        N        N        N     N       N        N        N        Y
## 3        N        N        N     N       Y        N        N        N
## 4        N        N        N     N       N        Y        N        N
## 5        N        N        N     N       N        N        N        N
## 6        Y        N        N     N       N        N        N        N
##   ac_proxm rf_attir cs_objcs cs_descr cs_casng cs_lkout rf_vcact cs_cloth
## 1        N        N        N        N        N        N        N        N
## 2        N        N        N        N        Y        N        N        N
## 3        N        Y        N        N        N        N        N        N
## 4        Y        N        N        N        Y        Y        N        N
## 5        N        N        N        N        Y        N        N        Y
## 6        Y        N        N        N        N        N        N        N
##   cs_drgtr ac_evasv ac_assoc cs_furtv rf_rfcmp ac_cgdir rf_verbl cs_vcrim
## 1        N        N        N        N        N        N        N        N
## 2        N        N        N        N        N        N        N        N
## 3        N        Y        N        N        N        N        N        N
## 4        N        N        Y        Y        N        N        N        N
## 5        N        N        N        Y        N        Y        N        N
## 6        N        Y        N        Y        N        N        N        N
##   cs_bulge cs_other ac_incid ac_time rf_knowl ac_stsnd ac_other sb_hdobj
## 1        N        Y        N       N        Y        N        N        N
## 2        N        N        N       Y        N        N        N        N
## 3        N        Y        N       N        N        N        N        N
## 4        N        N        Y       Y        Y        N        N        N
## 5        N        N        Y       Y        N        N        N        N
## 6        Y        N        Y       N        N        N        N        Y
##   sb_outln sb_admis sb_other repcmd revcmd rf_furt rf_bulg offverb offshld
## 1        N        N        N    186    186       N       N               S
## 2        N        N        Y    863    863       N       N       V       S
## 3        N        N        N     84     84       Y       N       V       S
## 4        N        N        N     84     84       Y       N                
## 5        N        N        N    187    187       N       N       V       S
## 6        N        N        Y    163    163       Y       Y                
##   forceuse sex race      dob age ht_feet ht_inch weight haircolr eyecolor
## 1            M    B 12311900  18       5       7    150       BK       BR
## 2       OT   M    B 12311900  31       5       7    160       BK       BR
## 3            M    B 12311900  16       5       8    160       BK       BR
## 4            M    A 12311900  19       5       7    150       BK       BR
## 5            M    B 12311900  32       5      10    200       BK       BR
## 6       DS   M    B 12311900  22       5       9    160       BK       BR
##   build othfeatr addrtyp rescode premtype              premname addrnum
## 1     M                L      NA       NA                              
## 2     M                L      NA       NA MEZZANINE (POST 0431)        
## 3     T                L      NA       NA                 HOTEL     224
## 4     Z    SMALL       L      NA       NA                 LOBBY     190
## 5     U                L      NA       NA                STREET        
## 6     M                L      NA       NA                STREET     364
##            stname       stinter           crossst aptnum      city state
## 1                 CHURCH AVENUE    EAST 39 STREET     NA  BROOKLYN    NA
## 2                  ESSEX STREET   DELANCEY STREET     NA MANHATTAN    NA
## 3 DUFFIELD STREET FULTON STREET WILLOUGHBY STREET     NA  BROOKLYN    NA
## 4     YORK STREET   GOLD STREET     BRIDGE STREET     NA  BROOKLYN    NA
## 5                  RALPH AVENUE     BERGEN STREET     NA  BROOKLYN    NA
## 6 EAST 170 STREET   CLAY AVENUE     TELLER AVENUE     NA     BRONX    NA
##   zip addrpct sector beat post  xcoord ycoord dettypCM lineCM detailCM
## 1  NA      67      G   NA   NA 1000633 176542       CM      1       20
## 2  NA       7      B    2   NA  987521 201066       CM      1       23
## 3  NA      84      F   NA   NA  988579 191174       CM      1       45
## 4  NA      84      D   NA   NA  988827 194808       CM      1       31
## 5  NA      77      J    1   NA 1005873 185052       CM      1       46
## 6  NA      44      R   NA   66 1009416 244229       CM      1       20

In order to comprehend the information captured in the csv file, you need to go through the specs document. For example, column “pct” stands for precinct of stop; column “perobs” stands for period of observation in minutes; etc.

Cleasing the data

# 1 YEAR OF STOP (CCYY)
dat$year <- NULL

# 2 PRECINCT OF STOP (FROM 1 TO 123)
dat$pct <- as.factor(dat$pct)

# 3 UF250 SERIAL NUMBER
dat$ser_num <- NULL

# 4 DATE OF STOP (MM-DD-YYYY)
dat$datestop <- as.Date(sprintf("%08d", dat$datestop), format ="%m%d%Y")

# 5 TIME OF STOP (HH:MM)
dat$timestop <- as.integer(substr(sprintf("%04d", dat$timestop), 1, 2))

# 6 RECORD STATUS
dat$recstat <- NULL

# 7 WAS STOP INSIDE OR OUTSIDE ?
dat$inout <- as.factor(dat$inout)

# 8 WAS LOCATION HOUSING OR TRANSIT AUTHORITY ?
dat$trhsloc <- as.factor(dat$trhsloc)
# 9 PERIOD OF OBSERVATION (MMM)
summary(dat$perobs)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   1.000   2.683   2.000 635.000

Clearly there are some outliers. Let’s remove those greater than 2 hours (120).

# 9 PERIOD OF OBSERVATION (MMM)
dat$perobs[ dat$perobs>120 ] <- NA
dat$perobs <- as.integer(as.character(dat$perobs))

# 10 CRIME SUSPECTED
dat$crimsusp <- NULL
# 11 PERIOD OF STOP (MMM)
summary(dat$perstop)
##     0     1     2     3     4     5     6     7     8     9    **     0 
##     1   464   919   888   302  4193   173   187   187    78    34     1 
##     1    10   100   105    11   110    12   120    13   139    14    15 
##  1421  8754     1     1   114     1   238    46   138     1    70  2145 
##    16    17    18   180    19     2    20    21    22    23    24    25 
##    81    74    74     5    36  2737  1080    18    42    43    14   202 
##    26    27    28    29     3    30    31    32    33    34    35    36 
##    23    10    18    11  2735   374     2     8     6     9    43    11 
##    37    38    39     4    40    41    42    43    45    46    47    48 
##    11     5     6  1198    77     2     5     2    60     2     2     3 
##    49     5    50    51    52    53    54    55    56    58    59     6 
##     3 14120    27     1     2     7     4     6     1     4     6   581 
##    60    63    65    67    68     7    70    75     8    80    81    85 
##    54     1     3     1     1   707     3     1   646     2     1     1 
##     9    90    92 
##   213     4     1

“**" is not a valid input. Let’s remove it.

# 11 PERIOD OF STOP (MMM)
dat$perstop[dat$perstop =="**"] <- NA
dat$perstop <- as.integer(as.character(dat$perstop))

# 12 STOPPED PERSON'S IDENTIFICATION TYPE
dat$typeofid <- as.factor(dat$typeofid)

# 13 DID OFFICER EXPLAIN REASON FOR STOP ?
dat$explnstp <- dat$explnstp=="Y"

# 14 WERE OTHER PERSONS STOPPED, QUESTIONED OR FRISKED ?
dat$othpers <- dat$othpers=="Y"

# 15 WAS AN ARREST MADE ?
dat$arstmade <- dat$arstmade=="Y"

# 16 OFFENSE SUSPECT ARRESTED FOR
dat$arstoffn <- NULL

# 17 WAS A SUMMONS ISSUED ?
dat$sumissue <- dat$sumissue=="Y"

# 18 OFFENSE SUSPECT WAS SUMMONSED FOR
dat$sumoffen <- NULL

# 19 COMPLAINT YEAR (IF COMPLAINT REPORT PREPARED)
dat$compyear <- NULL

# 20 COMPLAINT PRECINCT (IF COMPLAINT REPORT PREPARED)
dat$comppct <- NULL

# 21 WAS OFFICER IN UNIFORM ?
dat$offunif <- dat$offunif=="Y"

# 22 ID CARD PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$officrid <- NULL

# 23 WAS SUSPECT FRISKED ?
dat$frisked <- dat$frisked=="Y"

# 24 WAS SUSPECT SEARCHED ?
dat$searched <- dat$searched=="Y"

# WAS ARMED?
  # 25 WAS CONTRABAND FOUND ON SUSPECT ?
  # 27 WAS A PISTOL FOUND ON SUSPECT ?
  # 28 WAS A RIFLE FOUND ON SUSPECT ?
  # 29 WAS AN ASSAULT WEAPON FOUND ON SUSPECT ?
  # 30 WAS A KNIFE OR CUTTING INSTRUMENT FOUND ON SUSPECT ?
  # 31 WAS A MACHINE GUN FOUND ON SUSPECT ?
  # 32 WAS ANOTHER TYPE OF WEAPON FOUND ON SUSPECT
binary <- strsplit("contrabn pistol riflshot asltweap knifcuti machgun othrweap",   " ")[[1]]
for(b in binary) dat[[b]] <- dat[[b]]=="Y"

# 26 WERE ADDITIONAL REPORTS PREPARED ?
dat$adtlrept <- dat$adtlrept=="Y"

# PHYSICAL FORCE USED BY OFFICER
  # 33 PHYSICAL FORCE USED BY OFFICER - HANDS
  # 34 PHYSICAL FORCE USED BY OFFICER - SUSPECT AGAINST WALL
  # 35 PHYSICAL FORCE USED BY OFFICER - SUSPECT ON GROUND
  # 36 PHYSICAL FORCE USED BY OFFICER - WEAPON DRAWN
  # 37 PHYSICAL FORCE USED BY OFFICER - WEAPON POINTED
  # 38 PHYSICAL FORCE USED BY OFFICER - BATON
  # 39 PHYSICAL FORCE USED BY OFFICER - HANDCUFFS
  # 40 PHYSICAL FORCE USED BY OFFICER - PEPPER SPRAY
  # 41 PHYSICAL FORCE USED BY OFFICER - OTHER
for(b in grep("pf_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"

# 42 RADIO RUN
dat$radio <- dat$radio=="Y"

# ADDITIONAL CIRCUMSTANCES
  # 43 ADDITIONAL CIRCUMSTANCES - REPORT BY VICTIM/WITNESS/OFFICER
  # 44 ADDITIONAL CIRCUMSTANCES - ONGOING INVESTIGATION
  # 47 ADDITIONAL CIRCUMSTANCES - PROXIMITY TO SCENE OF OFFENSE
  # 56 ADDITIONAL CIRCUMSTANCES - EVASIVE RESPONSE TO QUESTIONING
  # 57 ADDITIONAL CIRCUMSTANCES - ASSOCIATING WITH KNOWN CRIMINALS
  # 60 ADDITIONAL CIRCUMSTANCES - CHANGE DIRECTION AT SIGHT OF OFFICER
  # 65 ADDITIONAL CIRCUMSTANCES - AREA HAS HIGH CRIME INCIDENCE
  # 68 ADDITIONAL CIRCUMSTANCES - SIGHTS OR SOUNDS OF CRIMINAL ACTIVITY
  # 69 ADDITIONAL CIRCUMSTANCES - OTHER
  # 66 ADDITIONAL CIRCUMSTANCES - TIME OF DAY FITS CRIME INCIDENCE
for(b in grep("ac_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"

# REASON FOR FRISK
  # 45 REASON FOR FRISK - VIOLENT CRIME SUSPECTED
  # 46 REASON FOR FRISK - OTHER SUSPICION OF WEAPONS
  # 48 REASON FOR FRISK - INAPPROPRIATE ATTIRE FOR SEASON
  # 53 REASON FOR FRISK-  ACTIONS OF ENGAGING IN A VIOLENT CRIME
  # 59 REASON FOR FRISK - REFUSE TO COMPLY W OFFICER'S DIRECTIONS
  # 61 REASON FOR FRISK - VERBAL THREATS BY SUSPECT
  # 67 REASON FOR FRISK - KNOWLEDGE OF SUSPECT'S PRIOR CRIM BEHAV
  # 76 REASON FOR FRISK - FURTIVE MOVEMENTS
  # 77 REASON FOR FRISK - SUSPICIOUS BULGE
for(b in grep("rf_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"

# REASON FOR STOP
  # 49 REASON FOR STOP - CARRYING SUSPICIOUS OBJECT
  # 50 REASON FOR STOP - FITS A RELEVANT DESCRIPTION
  # 51 REASON FOR STOP - CASING A VICTIM OR LOCATION
  # 52 REASON FOR STOP - SUSPECT ACTING AS A LOOKOUT
  # 54 REASON FOR STOP - WEARING CLOTHES COMMONLY USED IN A CRIME
  # 55 REASON FOR STOP - ACTIONS INDICATIVE OF A DRUG TRANSACTION
  # 58 REASON FOR STOP - FURTIVE MOVEMENTS
  # 62 REASON FOR STOP - ACTIONS OF ENGAGING IN A VIOLENT CRIME
  # 63 REASON FOR STOP - SUSPICIOUS BULGE
  # 64 REASON FOR STOP - OTHER
for(b in grep("cs_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"

# BASIS OF SEARCH
  # 70 BASIS OF SEARCH - HARD OBJECT
  # 71 BASIS OF SEARCH - OUTLINE OF WEAPON
  # 72 BASIS OF SEARCH - ADMISSION BY SUSPECT
  # 73 BASIS OF SEARCH - OTHER
for(b in grep("sb_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"

# 74 REPORTING OFFICER'S COMMAND (1 TO 999)
dat$repcmd <- as.factor(dat$repcmd)

# 75 REVIEWING OFFICER'S COMMAND (1 TO 999)
dat$revcmd <- as.factor(dat$revcmd)

# 78 VERBAL STATEMENT PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$offverb <- as.factor(dat$offverb)

# 79 SHIELD PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$offshld <- as.factor(dat$offshld)

# 80 REASON FORCE USED
dat$forceuse <- as.factor(dat$forceuse)

# 81 SUSPECT'S SEX
dat$sex <- as.factor(dat$sex)

# 82 SUSPECT'S RACE
dat$race <- as.factor(dat$race)

# 83 SUSPECT'S DATE OF BIRTH (CCYY-MM-DD)
dat$dob <- NULL
# 84 SUSPECT'S AGE
dat$age <- as.integer(as.character(dat$age))
## Warning: NAs introduced by coercion
summary(dat$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   20.00   25.00   28.55   34.00  999.00      54

Clearly there are some outliers. Let’s remove those less than 10 or more than 90.

# 84 SUSPECT'S AGE
dat$age[ dat$age < 10 | dat$age > 90 ] <- NA
summary(dat$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   10.00   20.00   25.00   28.09   34.00   89.00     212
# 85 SUSPECT'S HEIGHT (FEET)
dat$ht_feet <- NULL

# 86 SUSPECT'S HEIGHT (INCHES)
dat$ht_inch <- NULL

# 87 SUSPECT'S WEIGHT
dat$weight <- NULL

# 88 SUSPECT'S HAIRCOLOR
dat$haircolr <- as.factor(dat$haircolr)

# 89 SUSPECT'S EYE COLOR
dat$eyecolor <- as.factor(dat$eyecolor)

# 90 SUSPECT'S BUILD
dat$build <- as.factor(dat$build)

# 91 SUSPECT'S OTHER FEATURES (SCARS, TATOOS ETC.)
dat$othfeatr <- NULL

# 92 LOCATION OF STOP ADDRESS TYPE
# 93 LOCATION OF STOP RESIDENT CODE
# 94 LOCATION OF STOP PREMISE TYPE
# 95 LOCATION OF STOP PREMISE NAME
# 96 LOCATION OF STOP ADDRESS NUMBER
# 97 LOCATION OF STOP STREET NAME
# 98 LOCATION OF STOP INTERSECTION
# 99 LOCATION OF STOP CROSS STREET
# 100 LOCATION OF STOP APT NUMBER
# 102 LOCATION OF STOP STATE
# 103 LOCATION OF STOP ZIP CODE
# 104 LOCATION OF STOP ADDRESS PRECINCT
# 105 LOCATION OF STOP SECTOR
# 106 LOCATION OF STOP BEAT
# 107 LOCATION OF STOP POST
binary <- strsplit("addrtyp rescode premtype premname addrnum stname stinter crossst aptnum state zip addrpct sector beat post",   " ")[[1]]
for(b in binary) dat[[b]] <- NULL

# 101 LOCATION OF STOP CITY
dat$city <- as.factor(dat$city)

# 108 LOCATION OF STOP X COORD
dat$xcoord <- as.integer(dat$xcoord)

# 109 LOCATION OF STOP Y COORD
dat$ycoord <- as.integer(dat$ycoord)

# 110 DETAILS TYPES CODE
dat$dettypCM <- NULL

# 111 lineCM
dat$lineCM <- NULL

# 112 CRIME CODE DESCRIPTION
dat$detailCM <- as.factor(dat$detailCM)

After data cleansing, check the dimensions of new dataset.

dim(dat)
## [1] 45787    81

Let’s revisit the dataset.

head(dat)
##   pct   datestop timestop inout trhsloc perobs perstop typeofid explnstp
## 1  67 2014-01-01       23     O       P      1       5        V     TRUE
## 2   7 2014-01-03       15     I       T      2       5        P     TRUE
## 3  84 2014-01-04       21     I       P      1      10        P     TRUE
## 4  84 2014-01-09       12     I       H      1       5        P     TRUE
## 5  77 2014-01-09       13     I       P      1       2        P     TRUE
## 6  44 2014-01-15       22     O       P      2       5        V     TRUE
##   othpers arstmade sumissue offunif frisked searched contrabn adtlrept
## 1    TRUE    FALSE    FALSE   FALSE    TRUE    FALSE    FALSE    FALSE
## 2   FALSE     TRUE    FALSE   FALSE    TRUE     TRUE    FALSE    FALSE
## 3    TRUE    FALSE    FALSE   FALSE    TRUE    FALSE    FALSE    FALSE
## 4    TRUE    FALSE    FALSE    TRUE    TRUE    FALSE    FALSE    FALSE
## 5   FALSE    FALSE    FALSE   FALSE   FALSE    FALSE    FALSE    FALSE
## 6   FALSE     TRUE    FALSE    TRUE    TRUE     TRUE    FALSE    FALSE
##   pistol riflshot asltweap knifcuti machgun othrweap pf_hands pf_wall
## 1  FALSE    FALSE    FALSE    FALSE   FALSE    FALSE    FALSE   FALSE
## 2  FALSE    FALSE    FALSE    FALSE   FALSE    FALSE     TRUE   FALSE
## 3  FALSE    FALSE    FALSE    FALSE   FALSE    FALSE    FALSE   FALSE
## 4  FALSE    FALSE    FALSE    FALSE   FALSE    FALSE    FALSE   FALSE
## 5  FALSE    FALSE    FALSE    FALSE   FALSE    FALSE    FALSE   FALSE
## 6  FALSE    FALSE    FALSE     TRUE   FALSE    FALSE    FALSE   FALSE
##   pf_grnd pf_drwep pf_ptwep pf_baton pf_hcuff pf_pepsp pf_other radio
## 1   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE FALSE
## 2   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE FALSE
## 3   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE FALSE
## 4   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE FALSE
## 5   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE FALSE
## 6   FALSE    FALSE    FALSE    FALSE     TRUE    FALSE    FALSE FALSE
##   ac_rept ac_inves rf_vcrim rf_othsw ac_proxm rf_attir cs_objcs cs_descr
## 1   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE
## 2   FALSE    FALSE    FALSE     TRUE    FALSE    FALSE    FALSE    FALSE
## 3    TRUE    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE    FALSE
## 4   FALSE     TRUE    FALSE    FALSE     TRUE    FALSE    FALSE    FALSE
## 5   FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE
## 6   FALSE    FALSE    FALSE    FALSE     TRUE    FALSE    FALSE    FALSE
##   cs_casng cs_lkout rf_vcact cs_cloth cs_drgtr ac_evasv ac_assoc cs_furtv
## 1    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE
## 2     TRUE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE
## 3    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE    FALSE
## 4     TRUE     TRUE    FALSE    FALSE    FALSE    FALSE     TRUE     TRUE
## 5     TRUE    FALSE    FALSE     TRUE    FALSE    FALSE    FALSE     TRUE
## 6    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE     TRUE
##   rf_rfcmp ac_cgdir rf_verbl cs_vcrim cs_bulge cs_other ac_incid ac_time
## 1    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE   FALSE
## 2    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    TRUE
## 3    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE   FALSE
## 4    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    TRUE
## 5    FALSE     TRUE    FALSE    FALSE    FALSE    FALSE     TRUE    TRUE
## 6    FALSE    FALSE    FALSE    FALSE     TRUE    FALSE     TRUE   FALSE
##   rf_knowl ac_stsnd ac_other sb_hdobj sb_outln sb_admis sb_other repcmd
## 1     TRUE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    186
## 2    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE     TRUE    863
## 3    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE     84
## 4     TRUE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE     84
## 5    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    FALSE    187
## 6    FALSE    FALSE    FALSE     TRUE    FALSE    FALSE     TRUE    163
##   revcmd rf_furt rf_bulg offverb offshld forceuse sex race age haircolr
## 1    186   FALSE   FALSE               S            M    B  18       BK
## 2    863   FALSE   FALSE       V       S       OT   M    B  31       BK
## 3     84    TRUE   FALSE       V       S            M    B  16       BK
## 4     84    TRUE   FALSE                            M    A  19       BK
## 5    187   FALSE   FALSE       V       S            M    B  32       BK
## 6    163    TRUE    TRUE                       DS   M    B  22       BK
##   eyecolor build      city  xcoord ycoord detailCM
## 1       BR     M  BROOKLYN 1000633 176542       20
## 2       BR     M MANHATTAN  987521 201066       23
## 3       BR     T  BROOKLYN  988579 191174       45
## 4       BR     Z  BROOKLYN  988827 194808       31
## 5       BR     U  BROOKLYN 1005873 185052       46
## 6       BR     M     BRONX 1009416 244229       20

Now we save the cleansed dataset for future analysis.

save(dat, file="SQF_clean.rda")