The Stop, Question and Frisk Data from NYPD is available at http://www.nyc.gov/html/nypd/html/analysis_and_planning/stop_question_and_frisk_report.shtml
Download the 2014 archive and specs, unzip, and let the party begins.
dat <- read.csv("2014.csv")
First, load 2014 SQF dataset and take a look at the data. There are 45787 rows and 112 columns.
head(dat)
## year pct ser_num datestop timestop recstat inout trhsloc perobs
## 1 2014 67 15 1012014 2330 1 O P 1
## 2 2014 7 26 1032014 1530 1 I T 2
## 3 2014 84 52 1042014 2100 A I P 1
## 4 2014 84 13 1092014 1250 1 I H 1
## 5 2014 77 19 1092014 1310 1 I P 1
## 6 2014 44 62 1152014 2220 A O P 2
## crimsusp perstop typeofid explnstp othpers arstmade
## 1 FEL 5 V Y Y N
## 2 CRIMINAL MISCHIEF 5 P Y N Y
## 3 GRAND LARCENY (FEL) 10 P Y Y N
## 4 CRIM TRESPASS 5 P Y Y N
## 5 FEL 2 P Y N N
## 6 CPW (GRAVITY KNIFE) 5 V Y N Y
## arstoffn sumissue sumoffen compyear comppct offunif officrid
## 1 N 0 0 N
## 2 PL 170.25 N 0 0 N
## 3 N 0 0 N
## 4 N 0 0 Y
## 5 N 0 0 N
## 6 CPW PL 265.01 N 0 0 Y
## frisked searched contrabn adtlrept pistol riflshot asltweap knifcuti
## 1 Y N N N N N N N
## 2 Y Y N N N N N N
## 3 Y N N N N N N N
## 4 Y N N N N N N N
## 5 N N N N N N N N
## 6 Y Y N N N N N Y
## machgun othrweap pf_hands pf_wall pf_grnd pf_drwep pf_ptwep pf_baton
## 1 N N N N N N N N
## 2 N N Y N N N N N
## 3 N N N N N N N N
## 4 N N N N N N N N
## 5 N N N N N N N N
## 6 N N N N N N N N
## pf_hcuff pf_pepsp pf_other radio ac_rept ac_inves rf_vcrim rf_othsw
## 1 N N N N N N N N
## 2 N N N N N N N Y
## 3 N N N N Y N N N
## 4 N N N N N Y N N
## 5 N N N N N N N N
## 6 Y N N N N N N N
## ac_proxm rf_attir cs_objcs cs_descr cs_casng cs_lkout rf_vcact cs_cloth
## 1 N N N N N N N N
## 2 N N N N Y N N N
## 3 N Y N N N N N N
## 4 Y N N N Y Y N N
## 5 N N N N Y N N Y
## 6 Y N N N N N N N
## cs_drgtr ac_evasv ac_assoc cs_furtv rf_rfcmp ac_cgdir rf_verbl cs_vcrim
## 1 N N N N N N N N
## 2 N N N N N N N N
## 3 N Y N N N N N N
## 4 N N Y Y N N N N
## 5 N N N Y N Y N N
## 6 N Y N Y N N N N
## cs_bulge cs_other ac_incid ac_time rf_knowl ac_stsnd ac_other sb_hdobj
## 1 N Y N N Y N N N
## 2 N N N Y N N N N
## 3 N Y N N N N N N
## 4 N N Y Y Y N N N
## 5 N N Y Y N N N N
## 6 Y N Y N N N N Y
## sb_outln sb_admis sb_other repcmd revcmd rf_furt rf_bulg offverb offshld
## 1 N N N 186 186 N N S
## 2 N N Y 863 863 N N V S
## 3 N N N 84 84 Y N V S
## 4 N N N 84 84 Y N
## 5 N N N 187 187 N N V S
## 6 N N Y 163 163 Y Y
## forceuse sex race dob age ht_feet ht_inch weight haircolr eyecolor
## 1 M B 12311900 18 5 7 150 BK BR
## 2 OT M B 12311900 31 5 7 160 BK BR
## 3 M B 12311900 16 5 8 160 BK BR
## 4 M A 12311900 19 5 7 150 BK BR
## 5 M B 12311900 32 5 10 200 BK BR
## 6 DS M B 12311900 22 5 9 160 BK BR
## build othfeatr addrtyp rescode premtype premname addrnum
## 1 M L NA NA
## 2 M L NA NA MEZZANINE (POST 0431)
## 3 T L NA NA HOTEL 224
## 4 Z SMALL L NA NA LOBBY 190
## 5 U L NA NA STREET
## 6 M L NA NA STREET 364
## stname stinter crossst aptnum city state
## 1 CHURCH AVENUE EAST 39 STREET NA BROOKLYN NA
## 2 ESSEX STREET DELANCEY STREET NA MANHATTAN NA
## 3 DUFFIELD STREET FULTON STREET WILLOUGHBY STREET NA BROOKLYN NA
## 4 YORK STREET GOLD STREET BRIDGE STREET NA BROOKLYN NA
## 5 RALPH AVENUE BERGEN STREET NA BROOKLYN NA
## 6 EAST 170 STREET CLAY AVENUE TELLER AVENUE NA BRONX NA
## zip addrpct sector beat post xcoord ycoord dettypCM lineCM detailCM
## 1 NA 67 G NA NA 1000633 176542 CM 1 20
## 2 NA 7 B 2 NA 987521 201066 CM 1 23
## 3 NA 84 F NA NA 988579 191174 CM 1 45
## 4 NA 84 D NA NA 988827 194808 CM 1 31
## 5 NA 77 J 1 NA 1005873 185052 CM 1 46
## 6 NA 44 R NA 66 1009416 244229 CM 1 20
In order to comprehend the information captured in the csv file, you need to go through the specs document. For example, column “pct” stands for precinct of stop; column “perobs” stands for period of observation in minutes; etc.
# 1 YEAR OF STOP (CCYY)
dat$year <- NULL
# 2 PRECINCT OF STOP (FROM 1 TO 123)
dat$pct <- as.factor(dat$pct)
# 3 UF250 SERIAL NUMBER
dat$ser_num <- NULL
# 4 DATE OF STOP (MM-DD-YYYY)
dat$datestop <- as.Date(sprintf("%08d", dat$datestop), format ="%m%d%Y")
# 5 TIME OF STOP (HH:MM)
dat$timestop <- as.integer(substr(sprintf("%04d", dat$timestop), 1, 2))
# 6 RECORD STATUS
dat$recstat <- NULL
# 7 WAS STOP INSIDE OR OUTSIDE ?
dat$inout <- as.factor(dat$inout)
# 8 WAS LOCATION HOUSING OR TRANSIT AUTHORITY ?
dat$trhsloc <- as.factor(dat$trhsloc)
# 9 PERIOD OF OBSERVATION (MMM)
summary(dat$perobs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 1.000 2.683 2.000 635.000
Clearly there are some outliers. Let’s remove those greater than 2 hours (120).
# 9 PERIOD OF OBSERVATION (MMM)
dat$perobs[ dat$perobs>120 ] <- NA
dat$perobs <- as.integer(as.character(dat$perobs))
# 10 CRIME SUSPECTED
dat$crimsusp <- NULL
# 11 PERIOD OF STOP (MMM)
summary(dat$perstop)
## 0 1 2 3 4 5 6 7 8 9 ** 0
## 1 464 919 888 302 4193 173 187 187 78 34 1
## 1 10 100 105 11 110 12 120 13 139 14 15
## 1421 8754 1 1 114 1 238 46 138 1 70 2145
## 16 17 18 180 19 2 20 21 22 23 24 25
## 81 74 74 5 36 2737 1080 18 42 43 14 202
## 26 27 28 29 3 30 31 32 33 34 35 36
## 23 10 18 11 2735 374 2 8 6 9 43 11
## 37 38 39 4 40 41 42 43 45 46 47 48
## 11 5 6 1198 77 2 5 2 60 2 2 3
## 49 5 50 51 52 53 54 55 56 58 59 6
## 3 14120 27 1 2 7 4 6 1 4 6 581
## 60 63 65 67 68 7 70 75 8 80 81 85
## 54 1 3 1 1 707 3 1 646 2 1 1
## 9 90 92
## 213 4 1
“**" is not a valid input. Let’s remove it.
# 11 PERIOD OF STOP (MMM)
dat$perstop[dat$perstop =="**"] <- NA
dat$perstop <- as.integer(as.character(dat$perstop))
# 12 STOPPED PERSON'S IDENTIFICATION TYPE
dat$typeofid <- as.factor(dat$typeofid)
# 13 DID OFFICER EXPLAIN REASON FOR STOP ?
dat$explnstp <- dat$explnstp=="Y"
# 14 WERE OTHER PERSONS STOPPED, QUESTIONED OR FRISKED ?
dat$othpers <- dat$othpers=="Y"
# 15 WAS AN ARREST MADE ?
dat$arstmade <- dat$arstmade=="Y"
# 16 OFFENSE SUSPECT ARRESTED FOR
dat$arstoffn <- NULL
# 17 WAS A SUMMONS ISSUED ?
dat$sumissue <- dat$sumissue=="Y"
# 18 OFFENSE SUSPECT WAS SUMMONSED FOR
dat$sumoffen <- NULL
# 19 COMPLAINT YEAR (IF COMPLAINT REPORT PREPARED)
dat$compyear <- NULL
# 20 COMPLAINT PRECINCT (IF COMPLAINT REPORT PREPARED)
dat$comppct <- NULL
# 21 WAS OFFICER IN UNIFORM ?
dat$offunif <- dat$offunif=="Y"
# 22 ID CARD PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$officrid <- NULL
# 23 WAS SUSPECT FRISKED ?
dat$frisked <- dat$frisked=="Y"
# 24 WAS SUSPECT SEARCHED ?
dat$searched <- dat$searched=="Y"
# WAS ARMED?
# 25 WAS CONTRABAND FOUND ON SUSPECT ?
# 27 WAS A PISTOL FOUND ON SUSPECT ?
# 28 WAS A RIFLE FOUND ON SUSPECT ?
# 29 WAS AN ASSAULT WEAPON FOUND ON SUSPECT ?
# 30 WAS A KNIFE OR CUTTING INSTRUMENT FOUND ON SUSPECT ?
# 31 WAS A MACHINE GUN FOUND ON SUSPECT ?
# 32 WAS ANOTHER TYPE OF WEAPON FOUND ON SUSPECT
binary <- strsplit("contrabn pistol riflshot asltweap knifcuti machgun othrweap", " ")[[1]]
for(b in binary) dat[[b]] <- dat[[b]]=="Y"
# 26 WERE ADDITIONAL REPORTS PREPARED ?
dat$adtlrept <- dat$adtlrept=="Y"
# PHYSICAL FORCE USED BY OFFICER
# 33 PHYSICAL FORCE USED BY OFFICER - HANDS
# 34 PHYSICAL FORCE USED BY OFFICER - SUSPECT AGAINST WALL
# 35 PHYSICAL FORCE USED BY OFFICER - SUSPECT ON GROUND
# 36 PHYSICAL FORCE USED BY OFFICER - WEAPON DRAWN
# 37 PHYSICAL FORCE USED BY OFFICER - WEAPON POINTED
# 38 PHYSICAL FORCE USED BY OFFICER - BATON
# 39 PHYSICAL FORCE USED BY OFFICER - HANDCUFFS
# 40 PHYSICAL FORCE USED BY OFFICER - PEPPER SPRAY
# 41 PHYSICAL FORCE USED BY OFFICER - OTHER
for(b in grep("pf_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"
# 42 RADIO RUN
dat$radio <- dat$radio=="Y"
# ADDITIONAL CIRCUMSTANCES
# 43 ADDITIONAL CIRCUMSTANCES - REPORT BY VICTIM/WITNESS/OFFICER
# 44 ADDITIONAL CIRCUMSTANCES - ONGOING INVESTIGATION
# 47 ADDITIONAL CIRCUMSTANCES - PROXIMITY TO SCENE OF OFFENSE
# 56 ADDITIONAL CIRCUMSTANCES - EVASIVE RESPONSE TO QUESTIONING
# 57 ADDITIONAL CIRCUMSTANCES - ASSOCIATING WITH KNOWN CRIMINALS
# 60 ADDITIONAL CIRCUMSTANCES - CHANGE DIRECTION AT SIGHT OF OFFICER
# 65 ADDITIONAL CIRCUMSTANCES - AREA HAS HIGH CRIME INCIDENCE
# 68 ADDITIONAL CIRCUMSTANCES - SIGHTS OR SOUNDS OF CRIMINAL ACTIVITY
# 69 ADDITIONAL CIRCUMSTANCES - OTHER
# 66 ADDITIONAL CIRCUMSTANCES - TIME OF DAY FITS CRIME INCIDENCE
for(b in grep("ac_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"
# REASON FOR FRISK
# 45 REASON FOR FRISK - VIOLENT CRIME SUSPECTED
# 46 REASON FOR FRISK - OTHER SUSPICION OF WEAPONS
# 48 REASON FOR FRISK - INAPPROPRIATE ATTIRE FOR SEASON
# 53 REASON FOR FRISK- ACTIONS OF ENGAGING IN A VIOLENT CRIME
# 59 REASON FOR FRISK - REFUSE TO COMPLY W OFFICER'S DIRECTIONS
# 61 REASON FOR FRISK - VERBAL THREATS BY SUSPECT
# 67 REASON FOR FRISK - KNOWLEDGE OF SUSPECT'S PRIOR CRIM BEHAV
# 76 REASON FOR FRISK - FURTIVE MOVEMENTS
# 77 REASON FOR FRISK - SUSPICIOUS BULGE
for(b in grep("rf_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"
# REASON FOR STOP
# 49 REASON FOR STOP - CARRYING SUSPICIOUS OBJECT
# 50 REASON FOR STOP - FITS A RELEVANT DESCRIPTION
# 51 REASON FOR STOP - CASING A VICTIM OR LOCATION
# 52 REASON FOR STOP - SUSPECT ACTING AS A LOOKOUT
# 54 REASON FOR STOP - WEARING CLOTHES COMMONLY USED IN A CRIME
# 55 REASON FOR STOP - ACTIONS INDICATIVE OF A DRUG TRANSACTION
# 58 REASON FOR STOP - FURTIVE MOVEMENTS
# 62 REASON FOR STOP - ACTIONS OF ENGAGING IN A VIOLENT CRIME
# 63 REASON FOR STOP - SUSPICIOUS BULGE
# 64 REASON FOR STOP - OTHER
for(b in grep("cs_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"
# BASIS OF SEARCH
# 70 BASIS OF SEARCH - HARD OBJECT
# 71 BASIS OF SEARCH - OUTLINE OF WEAPON
# 72 BASIS OF SEARCH - ADMISSION BY SUSPECT
# 73 BASIS OF SEARCH - OTHER
for(b in grep("sb_", colnames(dat), value=TRUE)) dat[[b]] <- dat[[b]]=="Y"
# 74 REPORTING OFFICER'S COMMAND (1 TO 999)
dat$repcmd <- as.factor(dat$repcmd)
# 75 REVIEWING OFFICER'S COMMAND (1 TO 999)
dat$revcmd <- as.factor(dat$revcmd)
# 78 VERBAL STATEMENT PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$offverb <- as.factor(dat$offverb)
# 79 SHIELD PROVIDED BY OFFICER (IF NOT IN UNIFORM)
dat$offshld <- as.factor(dat$offshld)
# 80 REASON FORCE USED
dat$forceuse <- as.factor(dat$forceuse)
# 81 SUSPECT'S SEX
dat$sex <- as.factor(dat$sex)
# 82 SUSPECT'S RACE
dat$race <- as.factor(dat$race)
# 83 SUSPECT'S DATE OF BIRTH (CCYY-MM-DD)
dat$dob <- NULL
# 84 SUSPECT'S AGE
dat$age <- as.integer(as.character(dat$age))
## Warning: NAs introduced by coercion
summary(dat$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 20.00 25.00 28.55 34.00 999.00 54
Clearly there are some outliers. Let’s remove those less than 10 or more than 90.
# 84 SUSPECT'S AGE
dat$age[ dat$age < 10 | dat$age > 90 ] <- NA
summary(dat$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 10.00 20.00 25.00 28.09 34.00 89.00 212
# 85 SUSPECT'S HEIGHT (FEET)
dat$ht_feet <- NULL
# 86 SUSPECT'S HEIGHT (INCHES)
dat$ht_inch <- NULL
# 87 SUSPECT'S WEIGHT
dat$weight <- NULL
# 88 SUSPECT'S HAIRCOLOR
dat$haircolr <- as.factor(dat$haircolr)
# 89 SUSPECT'S EYE COLOR
dat$eyecolor <- as.factor(dat$eyecolor)
# 90 SUSPECT'S BUILD
dat$build <- as.factor(dat$build)
# 91 SUSPECT'S OTHER FEATURES (SCARS, TATOOS ETC.)
dat$othfeatr <- NULL
# 92 LOCATION OF STOP ADDRESS TYPE
# 93 LOCATION OF STOP RESIDENT CODE
# 94 LOCATION OF STOP PREMISE TYPE
# 95 LOCATION OF STOP PREMISE NAME
# 96 LOCATION OF STOP ADDRESS NUMBER
# 97 LOCATION OF STOP STREET NAME
# 98 LOCATION OF STOP INTERSECTION
# 99 LOCATION OF STOP CROSS STREET
# 100 LOCATION OF STOP APT NUMBER
# 102 LOCATION OF STOP STATE
# 103 LOCATION OF STOP ZIP CODE
# 104 LOCATION OF STOP ADDRESS PRECINCT
# 105 LOCATION OF STOP SECTOR
# 106 LOCATION OF STOP BEAT
# 107 LOCATION OF STOP POST
binary <- strsplit("addrtyp rescode premtype premname addrnum stname stinter crossst aptnum state zip addrpct sector beat post", " ")[[1]]
for(b in binary) dat[[b]] <- NULL
# 101 LOCATION OF STOP CITY
dat$city <- as.factor(dat$city)
# 108 LOCATION OF STOP X COORD
dat$xcoord <- as.integer(dat$xcoord)
# 109 LOCATION OF STOP Y COORD
dat$ycoord <- as.integer(dat$ycoord)
# 110 DETAILS TYPES CODE
dat$dettypCM <- NULL
# 111 lineCM
dat$lineCM <- NULL
# 112 CRIME CODE DESCRIPTION
dat$detailCM <- as.factor(dat$detailCM)
After data cleansing, check the dimensions of new dataset.
dim(dat)
## [1] 45787 81
Let’s revisit the dataset.
head(dat)
## pct datestop timestop inout trhsloc perobs perstop typeofid explnstp
## 1 67 2014-01-01 23 O P 1 5 V TRUE
## 2 7 2014-01-03 15 I T 2 5 P TRUE
## 3 84 2014-01-04 21 I P 1 10 P TRUE
## 4 84 2014-01-09 12 I H 1 5 P TRUE
## 5 77 2014-01-09 13 I P 1 2 P TRUE
## 6 44 2014-01-15 22 O P 2 5 V TRUE
## othpers arstmade sumissue offunif frisked searched contrabn adtlrept
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 2 FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
## 3 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 4 TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE
## pistol riflshot asltweap knifcuti machgun othrweap pf_hands pf_wall
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## pf_grnd pf_drwep pf_ptwep pf_baton pf_hcuff pf_pepsp pf_other radio
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## ac_rept ac_inves rf_vcrim rf_othsw ac_proxm rf_attir cs_objcs cs_descr
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## 3 TRUE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 4 FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## cs_casng cs_lkout rf_vcact cs_cloth cs_drgtr ac_evasv ac_assoc cs_furtv
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 4 TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## 5 TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE
## 6 FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE
## rf_rfcmp ac_cgdir rf_verbl cs_vcrim cs_bulge cs_other ac_incid ac_time
## 1 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## 3 FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE
## 5 FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE
## 6 FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE
## rf_knowl ac_stsnd ac_other sb_hdobj sb_outln sb_admis sb_other repcmd
## 1 TRUE FALSE FALSE FALSE FALSE FALSE FALSE 186
## 2 FALSE FALSE FALSE FALSE FALSE FALSE TRUE 863
## 3 FALSE FALSE FALSE FALSE FALSE FALSE FALSE 84
## 4 TRUE FALSE FALSE FALSE FALSE FALSE FALSE 84
## 5 FALSE FALSE FALSE FALSE FALSE FALSE FALSE 187
## 6 FALSE FALSE FALSE TRUE FALSE FALSE TRUE 163
## revcmd rf_furt rf_bulg offverb offshld forceuse sex race age haircolr
## 1 186 FALSE FALSE S M B 18 BK
## 2 863 FALSE FALSE V S OT M B 31 BK
## 3 84 TRUE FALSE V S M B 16 BK
## 4 84 TRUE FALSE M A 19 BK
## 5 187 FALSE FALSE V S M B 32 BK
## 6 163 TRUE TRUE DS M B 22 BK
## eyecolor build city xcoord ycoord detailCM
## 1 BR M BROOKLYN 1000633 176542 20
## 2 BR M MANHATTAN 987521 201066 23
## 3 BR T BROOKLYN 988579 191174 45
## 4 BR Z BROOKLYN 988827 194808 31
## 5 BR U BROOKLYN 1005873 185052 46
## 6 BR M BRONX 1009416 244229 20
Now we save the cleansed dataset for future analysis.
save(dat, file="SQF_clean.rda")