Loading Packages

The packages for reading data, manipulation of data, visualization of data, and finally for modeling.

library(dplyr)      # used for data manipulation and joining data
library(ggplot2)    # used for ploting 


crime <- read.csv("crimedata.csv")  # reading the datafile

Lets look at an overview of the dataset.

glimpse(crime)
## Observations: 2,215
## Variables: 147
## $ communityname         <fct> BerkeleyHeightstownship, Marpletownship,...
## $ state                 <fct> NJ, PA, OR, NY, MN, MO, MA, IN, ND, TX, ...
## $ countyCode            <fct> 39, 45, ?, 35, 7, ?, 21, ?, 17, ?, ?, ?,...
## $ communityCode         <fct> 5320, 47616, ?, 29443, 5068, ?, 50250, ?...
## $ fold                  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ population            <int> 11980, 23123, 29344, 16656, 11245, 14049...
## $ householdsize         <dbl> 3.10, 2.82, 2.43, 2.40, 2.76, 2.45, 2.60...
## $ racepctblack          <dbl> 1.37, 0.80, 0.74, 1.70, 0.53, 2.51, 1.60...
## $ racePctWhite          <dbl> 91.78, 95.57, 94.33, 97.35, 89.16, 95.65...
## $ racePctAsian          <dbl> 6.50, 3.44, 3.43, 0.50, 1.17, 0.90, 1.47...
## $ racePctHisp           <dbl> 1.88, 0.85, 2.35, 0.70, 0.52, 0.95, 1.10...
## $ agePct12t21           <dbl> 12.47, 11.01, 11.36, 12.55, 24.46, 18.09...
## $ agePct12t29           <dbl> 21.44, 21.30, 25.88, 25.20, 40.53, 32.89...
## $ agePct16t24           <dbl> 10.93, 10.48, 11.01, 12.19, 28.69, 20.04...
## $ agePct65up            <dbl> 11.33, 17.18, 10.28, 17.57, 12.65, 13.26...
## $ numbUrban             <int> 11980, 23123, 29344, 0, 0, 140494, 28700...
## $ pctUrban              <dbl> 100.00, 100.00, 100.00, 0.00, 0.00, 100....
## $ medIncome             <int> 75122, 47917, 35669, 20580, 17390, 21577...
## $ pctWWage              <dbl> 89.24, 78.99, 82.00, 68.15, 69.33, 75.78...
## $ pctWFarmSelf          <dbl> 1.55, 1.11, 1.15, 0.24, 0.55, 1.00, 0.39...
## $ pctWInvInc            <dbl> 70.20, 64.11, 55.73, 38.95, 42.82, 41.15...
## $ pctWSocSec            <dbl> 23.62, 35.50, 22.25, 39.48, 32.16, 29.31...
## $ pctWPubAsst           <dbl> 1.03, 2.75, 2.94, 11.71, 11.21, 7.12, 5....
## $ pctWRetire            <dbl> 18.39, 22.85, 14.56, 18.33, 14.43, 14.09...
## $ medFamInc             <int> 79584, 55323, 42112, 26501, 24018, 27705...
## $ perCapInc             <int> 29711, 20148, 16946, 10810, 8483, 11878,...
## $ whitePerCap           <int> 30233, 20191, 17103, 10909, 9009, 12029,...
## $ blackPerCap           <int> 13600, 18137, 16644, 9984, 887, 7382, 17...
## $ indianPerCap          <int> 5725, 0, 21606, 4941, 4425, 10264, 21482...
## $ AsianPerCap           <int> 27101, 20074, 15528, 3541, 3352, 10753, ...
## $ OtherPerCap           <fct> 5115, 5250, 5954, 2451, 3000, 7192, 2185...
## $ HispPerCap            <int> 22838, 12222, 8405, 4391, 1328, 8104, 22...
## $ NumUnderPov           <int> 227, 885, 1389, 2831, 2855, 23223, 1126,...
## $ PctPopUnderPov        <dbl> 1.96, 3.98, 4.75, 17.23, 29.99, 17.78, 4...
## $ PctLess9thGrade       <dbl> 5.81, 5.61, 2.80, 11.05, 12.15, 8.76, 4....
## $ PctNotHSGrad          <dbl> 9.90, 13.72, 9.09, 33.68, 23.06, 23.03, ...
## $ PctBSorMore           <dbl> 48.18, 29.89, 30.13, 10.81, 25.28, 20.66...
## $ PctUnemployed         <dbl> 2.70, 2.43, 4.01, 9.86, 9.08, 5.72, 4.85...
## $ PctEmploy             <dbl> 64.55, 61.96, 69.80, 54.74, 52.44, 59.02...
## $ PctEmplManu           <dbl> 14.65, 12.26, 15.95, 31.22, 6.89, 14.31,...
## $ PctEmplProfServ       <dbl> 28.82, 29.28, 21.52, 27.43, 36.54, 26.83...
## $ PctOccupManu          <dbl> 5.49, 6.39, 8.79, 26.76, 10.94, 14.72, 8...
## $ PctOccupMgmtProf      <dbl> 50.73, 37.64, 32.48, 22.71, 27.80, 23.42...
## $ MalePctDivorce        <dbl> 3.67, 4.23, 10.10, 10.98, 7.51, 11.40, 5...
## $ MalePctNevMarr        <dbl> 26.38, 27.99, 25.78, 28.15, 50.66, 33.32...
## $ FemalePctDiv          <dbl> 5.22, 6.45, 14.76, 14.47, 11.64, 14.46, ...
## $ TotalPctDiv           <dbl> 4.47, 5.42, 12.55, 12.91, 9.73, 13.04, 7...
## $ PersPerFam            <dbl> 3.22, 3.11, 2.95, 2.98, 2.98, 2.89, 3.14...
## $ PctFam2Par            <dbl> 91.43, 86.91, 78.54, 64.02, 58.59, 71.94...
## $ PctKids2Par           <dbl> 90.17, 85.33, 78.85, 62.36, 55.20, 69.79...
## $ PctYoungKids2Par      <dbl> 95.78, 96.82, 92.37, 65.38, 66.51, 79.76...
## $ PctTeen2Par           <dbl> 95.81, 86.46, 75.72, 67.43, 79.17, 75.33...
## $ PctWorkMomYoungKids   <dbl> 44.56, 51.14, 66.08, 59.59, 61.22, 62.96...
## $ PctWorkMom            <dbl> 58.88, 62.43, 74.19, 70.27, 68.94, 70.52...
## $ NumKidsBornNeverMar   <int> 31, 43, 164, 561, 402, 1511, 263, 2368, ...
## $ PctKidsBornNeverMar   <dbl> 0.36, 0.24, 0.88, 3.84, 4.70, 1.58, 1.18...
## $ NumImmig              <int> 1277, 1920, 1468, 339, 196, 2091, 2637, ...
## $ PctImmigRecent        <dbl> 8.69, 5.21, 16.42, 13.86, 46.94, 21.33, ...
## $ PctImmigRec5          <dbl> 13.00, 8.65, 23.98, 13.86, 56.12, 30.56,...
## $ PctImmigRec8          <dbl> 20.99, 13.33, 32.08, 15.34, 67.86, 38.02...
## $ PctImmigRec10         <dbl> 30.93, 22.50, 35.63, 15.34, 69.90, 45.48...
## $ PctRecentImmig        <dbl> 0.93, 0.43, 0.82, 0.28, 0.82, 0.32, 1.05...
## $ PctRecImmig5          <dbl> 1.39, 0.72, 1.20, 0.28, 0.98, 0.45, 1.49...
## $ PctRecImmig8          <dbl> 2.24, 1.11, 1.61, 0.31, 1.18, 0.57, 2.20...
## $ PctRecImmig10         <dbl> 3.30, 1.87, 1.78, 0.31, 1.22, 0.68, 2.55...
## $ PctSpeakEnglOnly      <dbl> 85.68, 87.79, 93.11, 94.98, 94.64, 96.87...
## $ PctNotSpeakEnglWell   <dbl> 1.37, 1.81, 1.14, 0.56, 0.39, 0.60, 0.60...
## $ PctLargHouseFam       <dbl> 4.81, 4.25, 2.97, 3.93, 5.23, 3.08, 5.08...
## $ PctLargHouseOccup     <dbl> 4.17, 3.34, 2.05, 2.56, 3.11, 1.92, 3.46...
## $ PersPerOccupHous      <dbl> 2.99, 2.70, 2.42, 2.37, 2.35, 2.28, 2.55...
## $ PersPerOwnOccHous     <dbl> 3.00, 2.83, 2.69, 2.51, 2.55, 2.37, 2.89...
## $ PersPerRentOccHous    <dbl> 2.84, 1.96, 2.06, 2.20, 2.12, 2.16, 2.09...
## $ PctPersOwnOccup       <dbl> 91.46, 89.03, 64.18, 58.18, 58.13, 57.81...
## $ PctPersDenseHous      <dbl> 0.39, 1.01, 2.03, 1.21, 2.94, 2.11, 1.47...
## $ PctHousLess3BR        <dbl> 11.06, 23.60, 47.46, 45.66, 55.64, 53.19...
## $ MedNumBR              <int> 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3...
## $ HousVacant            <int> 64, 240, 544, 669, 333, 5119, 566, 2051,...
## $ PctHousOccup          <dbl> 98.37, 97.15, 95.68, 91.19, 92.45, 91.81...
## $ PctHousOwnOcc         <dbl> 91.01, 84.88, 57.79, 54.89, 53.57, 55.50...
## $ PctVacantBoarded      <dbl> 3.12, 0.00, 0.92, 2.54, 3.90, 2.09, 1.41...
## $ PctVacMore6Mos        <dbl> 37.50, 18.33, 7.54, 57.85, 42.64, 26.22,...
## $ MedYrHousBuilt        <int> 1959, 1958, 1976, 1939, 1958, 1966, 1956...
## $ PctHousNoPhone        <dbl> 0.00, 0.31, 1.55, 7.00, 7.45, 6.13, 0.69...
## $ PctWOFullPlumb        <dbl> 0.28, 0.14, 0.12, 0.87, 0.82, 0.31, 0.28...
## $ OwnOccLowQuart        <int> 215900, 136300, 74700, 36400, 30600, 377...
## $ OwnOccMedVal          <int> 262600, 164200, 90400, 49600, 43200, 539...
## $ OwnOccHiQuart         <int> 326900, 199900, 112000, 66500, 59500, 73...
## $ OwnOccQrange          <int> 111000, 63600, 37300, 30100, 28900, 3540...
## $ RentLowQ              <int> 685, 467, 370, 195, 202, 215, 463, 186, ...
## $ RentMedian            <int> 1001, 560, 428, 250, 283, 280, 669, 253,...
## $ RentHighQ             <int> 1001, 672, 520, 309, 362, 349, 824, 325,...
## $ RentQrange            <int> 316, 205, 150, 114, 160, 134, 361, 139, ...
## $ MedRent               <int> 1001, 627, 484, 333, 332, 340, 736, 338,...
## $ MedRentPctHousInc     <dbl> 23.8, 27.6, 24.1, 28.7, 32.2, 26.4, 24.4...
## $ MedOwnCostPctInc      <dbl> 21.1, 20.7, 21.7, 20.6, 23.2, 17.3, 20.8...
## $ MedOwnCostPctIncNoMtg <dbl> 14.0, 12.5, 11.6, 14.5, 12.9, 11.7, 12.5...
## $ NumInShelters         <int> 11, 0, 16, 0, 2, 327, 0, 21, 125, 43, 1,...
## $ NumStreet             <int> 0, 0, 0, 0, 0, 4, 0, 0, 15, 4, 0, 49, 2,...
## $ PctForeignBorn        <dbl> 10.66, 8.30, 5.00, 2.04, 1.74, 1.49, 9.1...
## $ PctBornSameState      <dbl> 53.72, 77.17, 44.77, 88.71, 73.75, 64.35...
## $ PctSameHouse85        <dbl> 65.29, 71.27, 36.60, 56.70, 42.22, 42.29...
## $ PctSameCity85         <dbl> 78.09, 90.22, 61.26, 90.17, 60.34, 70.61...
## $ PctSameState85        <dbl> 89.14, 96.12, 82.85, 96.24, 89.02, 85.66...
## $ LemasSwornFT          <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 198, ?, ?, ?,...
## $ LemasSwFTPerPop       <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 183.53, ?, ?,...
## $ LemasSwFTFieldOps     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 187, ?, ?, ?,...
## $ LemasSwFTFieldPerPop  <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 173.33, ?, ?,...
## $ LemasTotalReq         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 73432, ?, ?, ...
## $ LemasTotReqPerPop     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 68065.1, ?, ?...
## $ PolicReqPerOffic      <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 370.9, ?, ?, ...
## $ PolicPerPop           <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 183.5, ?, ?, ...
## $ RacialMatchCommPol    <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 89.32, ?, ?, ...
## $ PctPolicWhite         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 78.28, ?, ?, ...
## $ PctPolicBlack         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 11.11, ?, ?, ...
## $ PctPolicHisp          <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 10.61, ?, ?, ...
## $ PctPolicAsian         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, 0...
## $ PctPolicMinor         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 21.72, ?, ?, ...
## $ OfficAssgnDrugUnits   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 13, ?, ?, ?, ...
## $ NumKindsDrugsSeiz     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 12, ?, ?, ?, ...
## $ PolicAveOTWorked      <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 60.2, ?, ?, ?...
## $ LandArea              <dbl> 6.5, 10.6, 10.6, 5.2, 11.5, 70.4, 10.9, ...
## $ PopDens               <dbl> 1845.9, 2186.7, 2780.9, 3217.7, 974.2, 1...
## $ PctUsePubTrans        <dbl> 9.63, 3.84, 4.37, 3.31, 0.38, 0.97, 9.62...
## $ PolicCars             <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 100, ?, ?, ?,...
## $ PolicOperBudg         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 9315474, ?, ?...
## $ LemasPctPolicOnPatr   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 94.44, ?, ?, ...
## $ LemasGangUnitDeploy   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 10, ?, ?, ?, ...
## $ LemasPctOfficDrugUn   <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00...
## $ PolicBudgPerPop       <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 86346.3, ?, ?...
## $ murders               <int> 0, 0, 3, 0, 0, 7, 0, 8, 0, 29, 1, 12, 3,...
## $ murdPerPop            <dbl> 0.00, 0.00, 8.30, 0.00, 0.00, 4.63, 0.00...
## $ rapes                 <fct> 0, 1, 6, 10, ?, 77, 4, 34, 35, 141, 29, ...
## $ rapesPerPop           <fct> 0, 4.25, 16.6, 57.86, ?, 50.98, 13.53, 5...
## $ robberies             <fct> 1, 5, 56, 10, 4, 136, 9, 98, 16, 453, 71...
## $ robbbPerPop           <fct> 8.2, 21.26, 154.95, 57.86, 32.04, 90.05,...
## $ assaults              <fct> 4, 24, 14, 33, 14, 449, 54, 128, 41, 104...
## $ assaultPerPop         <fct> 32.81, 102.05, 38.74, 190.93, 112.14, 29...
## $ burglaries            <fct> 14, 57, 274, 225, 91, 2094, 110, 608, 42...
## $ burglPerPop           <fct> 114.85, 242.37, 758.14, 1301.78, 728.93,...
## $ larcenies             <fct> 138, 376, 1797, 716, 1060, 7690, 288, 22...
## $ larcPerPop            <fct> 1132.08, 1598.78, 4972.19, 4142.56, 8490...
## $ autoTheft             <fct> 16, 26, 136, 47, 91, 454, 144, 125, 206,...
## $ autoTheftPerPop       <fct> 131.26, 110.55, 376.3, 271.93, 728.93, 3...
## $ arsons                <fct> 2, 1, 22, ?, 5, 134, 17, 9, 8, 18, 6, 20...
## $ arsonsPerPop          <fct> 16.41, 4.25, 60.87, ?, 40.05, 88.72, 57....
## $ ViolentCrimesPerPop   <fct> 41.02, 127.56, 218.59, 306.64, ?, 442.95...
## $ nonViolPerPop         <fct> 1394.59, 1955.95, 6167.51, ?, 9988.79, 6...

The dataset has 2215 rows/observations and 147 columns/variables

Unique Identifier-Check for Duplicates and Missing Values

We perform a quick check for duplicates. We create the dataset ‘a’ to check if the City (communityname) names are unique. If number of observations in ‘a’ = number of observations in the crime data set then we have no duplicates.

a = unique(crime$communityname)  #selecting unique city name values 
a = as.data.frame(a)             #arranging a in a table format for ease in readind
dim(a)
## [1] 2018    1

‘a’ has 2018 unique observations. This significantly lower than the 2215 obsrvations in the crime dataset. Thus we have duplicates in the city names.

Lets investigate further by use of ’table’to figure out which city names have been duplicated.

table = data.frame(table(crime$communityname)) #table list all community names alphabetically and rheir frquency
duplicates = table[table$Freq!=1,]             #Subset the table to be only the duplicates of the community names
dim(duplicates)                                  
## [1] 144   2

We can see that there are 148 duplicate city names.

Now we subset the crimedata by keeping only the duplicate city names to investigate further

crime_dups = crime[crime$communityname %in% duplicates$Var1,]  #subsetting the data keeping only duplicate city names
crime_dups = arrange(crime_dups, communityname)                #arranging the subset in alphabetical order
dim(crime_dups)                                                     
## [1] 341 147

There are 341 observations of duplicated city mames. We can see the community names duplicates is due the fact that multiple states can have the same city name. Hence community names is not a unique identifier.

We can create a unique identifier by considering both city names and State. This can be done by concatenating city name and state.

crime$citystate = paste(crime$communityname, crime$state)    #create a new variable by concatenating city name and state
b = unique(crime$citystate) #selecting unique city and State name 
length(b)
## [1] 2215

There are 2215 unique values when cosidering both community names and State. Since this is also the number of observation in the crimedata then there are no duplicates.


cleaning variable type

Lets look at the variable attributes by using the glimpse function

glimpse(crime)
## Observations: 2,215
## Variables: 148
## $ communityname         <fct> BerkeleyHeightstownship, Marpletownship,...
## $ state                 <fct> NJ, PA, OR, NY, MN, MO, MA, IN, ND, TX, ...
## $ countyCode            <fct> 39, 45, ?, 35, 7, ?, 21, ?, 17, ?, ?, ?,...
## $ communityCode         <fct> 5320, 47616, ?, 29443, 5068, ?, 50250, ?...
## $ fold                  <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ population            <int> 11980, 23123, 29344, 16656, 11245, 14049...
## $ householdsize         <dbl> 3.10, 2.82, 2.43, 2.40, 2.76, 2.45, 2.60...
## $ racepctblack          <dbl> 1.37, 0.80, 0.74, 1.70, 0.53, 2.51, 1.60...
## $ racePctWhite          <dbl> 91.78, 95.57, 94.33, 97.35, 89.16, 95.65...
## $ racePctAsian          <dbl> 6.50, 3.44, 3.43, 0.50, 1.17, 0.90, 1.47...
## $ racePctHisp           <dbl> 1.88, 0.85, 2.35, 0.70, 0.52, 0.95, 1.10...
## $ agePct12t21           <dbl> 12.47, 11.01, 11.36, 12.55, 24.46, 18.09...
## $ agePct12t29           <dbl> 21.44, 21.30, 25.88, 25.20, 40.53, 32.89...
## $ agePct16t24           <dbl> 10.93, 10.48, 11.01, 12.19, 28.69, 20.04...
## $ agePct65up            <dbl> 11.33, 17.18, 10.28, 17.57, 12.65, 13.26...
## $ numbUrban             <int> 11980, 23123, 29344, 0, 0, 140494, 28700...
## $ pctUrban              <dbl> 100.00, 100.00, 100.00, 0.00, 0.00, 100....
## $ medIncome             <int> 75122, 47917, 35669, 20580, 17390, 21577...
## $ pctWWage              <dbl> 89.24, 78.99, 82.00, 68.15, 69.33, 75.78...
## $ pctWFarmSelf          <dbl> 1.55, 1.11, 1.15, 0.24, 0.55, 1.00, 0.39...
## $ pctWInvInc            <dbl> 70.20, 64.11, 55.73, 38.95, 42.82, 41.15...
## $ pctWSocSec            <dbl> 23.62, 35.50, 22.25, 39.48, 32.16, 29.31...
## $ pctWPubAsst           <dbl> 1.03, 2.75, 2.94, 11.71, 11.21, 7.12, 5....
## $ pctWRetire            <dbl> 18.39, 22.85, 14.56, 18.33, 14.43, 14.09...
## $ medFamInc             <int> 79584, 55323, 42112, 26501, 24018, 27705...
## $ perCapInc             <int> 29711, 20148, 16946, 10810, 8483, 11878,...
## $ whitePerCap           <int> 30233, 20191, 17103, 10909, 9009, 12029,...
## $ blackPerCap           <int> 13600, 18137, 16644, 9984, 887, 7382, 17...
## $ indianPerCap          <int> 5725, 0, 21606, 4941, 4425, 10264, 21482...
## $ AsianPerCap           <int> 27101, 20074, 15528, 3541, 3352, 10753, ...
## $ OtherPerCap           <fct> 5115, 5250, 5954, 2451, 3000, 7192, 2185...
## $ HispPerCap            <int> 22838, 12222, 8405, 4391, 1328, 8104, 22...
## $ NumUnderPov           <int> 227, 885, 1389, 2831, 2855, 23223, 1126,...
## $ PctPopUnderPov        <dbl> 1.96, 3.98, 4.75, 17.23, 29.99, 17.78, 4...
## $ PctLess9thGrade       <dbl> 5.81, 5.61, 2.80, 11.05, 12.15, 8.76, 4....
## $ PctNotHSGrad          <dbl> 9.90, 13.72, 9.09, 33.68, 23.06, 23.03, ...
## $ PctBSorMore           <dbl> 48.18, 29.89, 30.13, 10.81, 25.28, 20.66...
## $ PctUnemployed         <dbl> 2.70, 2.43, 4.01, 9.86, 9.08, 5.72, 4.85...
## $ PctEmploy             <dbl> 64.55, 61.96, 69.80, 54.74, 52.44, 59.02...
## $ PctEmplManu           <dbl> 14.65, 12.26, 15.95, 31.22, 6.89, 14.31,...
## $ PctEmplProfServ       <dbl> 28.82, 29.28, 21.52, 27.43, 36.54, 26.83...
## $ PctOccupManu          <dbl> 5.49, 6.39, 8.79, 26.76, 10.94, 14.72, 8...
## $ PctOccupMgmtProf      <dbl> 50.73, 37.64, 32.48, 22.71, 27.80, 23.42...
## $ MalePctDivorce        <dbl> 3.67, 4.23, 10.10, 10.98, 7.51, 11.40, 5...
## $ MalePctNevMarr        <dbl> 26.38, 27.99, 25.78, 28.15, 50.66, 33.32...
## $ FemalePctDiv          <dbl> 5.22, 6.45, 14.76, 14.47, 11.64, 14.46, ...
## $ TotalPctDiv           <dbl> 4.47, 5.42, 12.55, 12.91, 9.73, 13.04, 7...
## $ PersPerFam            <dbl> 3.22, 3.11, 2.95, 2.98, 2.98, 2.89, 3.14...
## $ PctFam2Par            <dbl> 91.43, 86.91, 78.54, 64.02, 58.59, 71.94...
## $ PctKids2Par           <dbl> 90.17, 85.33, 78.85, 62.36, 55.20, 69.79...
## $ PctYoungKids2Par      <dbl> 95.78, 96.82, 92.37, 65.38, 66.51, 79.76...
## $ PctTeen2Par           <dbl> 95.81, 86.46, 75.72, 67.43, 79.17, 75.33...
## $ PctWorkMomYoungKids   <dbl> 44.56, 51.14, 66.08, 59.59, 61.22, 62.96...
## $ PctWorkMom            <dbl> 58.88, 62.43, 74.19, 70.27, 68.94, 70.52...
## $ NumKidsBornNeverMar   <int> 31, 43, 164, 561, 402, 1511, 263, 2368, ...
## $ PctKidsBornNeverMar   <dbl> 0.36, 0.24, 0.88, 3.84, 4.70, 1.58, 1.18...
## $ NumImmig              <int> 1277, 1920, 1468, 339, 196, 2091, 2637, ...
## $ PctImmigRecent        <dbl> 8.69, 5.21, 16.42, 13.86, 46.94, 21.33, ...
## $ PctImmigRec5          <dbl> 13.00, 8.65, 23.98, 13.86, 56.12, 30.56,...
## $ PctImmigRec8          <dbl> 20.99, 13.33, 32.08, 15.34, 67.86, 38.02...
## $ PctImmigRec10         <dbl> 30.93, 22.50, 35.63, 15.34, 69.90, 45.48...
## $ PctRecentImmig        <dbl> 0.93, 0.43, 0.82, 0.28, 0.82, 0.32, 1.05...
## $ PctRecImmig5          <dbl> 1.39, 0.72, 1.20, 0.28, 0.98, 0.45, 1.49...
## $ PctRecImmig8          <dbl> 2.24, 1.11, 1.61, 0.31, 1.18, 0.57, 2.20...
## $ PctRecImmig10         <dbl> 3.30, 1.87, 1.78, 0.31, 1.22, 0.68, 2.55...
## $ PctSpeakEnglOnly      <dbl> 85.68, 87.79, 93.11, 94.98, 94.64, 96.87...
## $ PctNotSpeakEnglWell   <dbl> 1.37, 1.81, 1.14, 0.56, 0.39, 0.60, 0.60...
## $ PctLargHouseFam       <dbl> 4.81, 4.25, 2.97, 3.93, 5.23, 3.08, 5.08...
## $ PctLargHouseOccup     <dbl> 4.17, 3.34, 2.05, 2.56, 3.11, 1.92, 3.46...
## $ PersPerOccupHous      <dbl> 2.99, 2.70, 2.42, 2.37, 2.35, 2.28, 2.55...
## $ PersPerOwnOccHous     <dbl> 3.00, 2.83, 2.69, 2.51, 2.55, 2.37, 2.89...
## $ PersPerRentOccHous    <dbl> 2.84, 1.96, 2.06, 2.20, 2.12, 2.16, 2.09...
## $ PctPersOwnOccup       <dbl> 91.46, 89.03, 64.18, 58.18, 58.13, 57.81...
## $ PctPersDenseHous      <dbl> 0.39, 1.01, 2.03, 1.21, 2.94, 2.11, 1.47...
## $ PctHousLess3BR        <dbl> 11.06, 23.60, 47.46, 45.66, 55.64, 53.19...
## $ MedNumBR              <int> 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3...
## $ HousVacant            <int> 64, 240, 544, 669, 333, 5119, 566, 2051,...
## $ PctHousOccup          <dbl> 98.37, 97.15, 95.68, 91.19, 92.45, 91.81...
## $ PctHousOwnOcc         <dbl> 91.01, 84.88, 57.79, 54.89, 53.57, 55.50...
## $ PctVacantBoarded      <dbl> 3.12, 0.00, 0.92, 2.54, 3.90, 2.09, 1.41...
## $ PctVacMore6Mos        <dbl> 37.50, 18.33, 7.54, 57.85, 42.64, 26.22,...
## $ MedYrHousBuilt        <int> 1959, 1958, 1976, 1939, 1958, 1966, 1956...
## $ PctHousNoPhone        <dbl> 0.00, 0.31, 1.55, 7.00, 7.45, 6.13, 0.69...
## $ PctWOFullPlumb        <dbl> 0.28, 0.14, 0.12, 0.87, 0.82, 0.31, 0.28...
## $ OwnOccLowQuart        <int> 215900, 136300, 74700, 36400, 30600, 377...
## $ OwnOccMedVal          <int> 262600, 164200, 90400, 49600, 43200, 539...
## $ OwnOccHiQuart         <int> 326900, 199900, 112000, 66500, 59500, 73...
## $ OwnOccQrange          <int> 111000, 63600, 37300, 30100, 28900, 3540...
## $ RentLowQ              <int> 685, 467, 370, 195, 202, 215, 463, 186, ...
## $ RentMedian            <int> 1001, 560, 428, 250, 283, 280, 669, 253,...
## $ RentHighQ             <int> 1001, 672, 520, 309, 362, 349, 824, 325,...
## $ RentQrange            <int> 316, 205, 150, 114, 160, 134, 361, 139, ...
## $ MedRent               <int> 1001, 627, 484, 333, 332, 340, 736, 338,...
## $ MedRentPctHousInc     <dbl> 23.8, 27.6, 24.1, 28.7, 32.2, 26.4, 24.4...
## $ MedOwnCostPctInc      <dbl> 21.1, 20.7, 21.7, 20.6, 23.2, 17.3, 20.8...
## $ MedOwnCostPctIncNoMtg <dbl> 14.0, 12.5, 11.6, 14.5, 12.9, 11.7, 12.5...
## $ NumInShelters         <int> 11, 0, 16, 0, 2, 327, 0, 21, 125, 43, 1,...
## $ NumStreet             <int> 0, 0, 0, 0, 0, 4, 0, 0, 15, 4, 0, 49, 2,...
## $ PctForeignBorn        <dbl> 10.66, 8.30, 5.00, 2.04, 1.74, 1.49, 9.1...
## $ PctBornSameState      <dbl> 53.72, 77.17, 44.77, 88.71, 73.75, 64.35...
## $ PctSameHouse85        <dbl> 65.29, 71.27, 36.60, 56.70, 42.22, 42.29...
## $ PctSameCity85         <dbl> 78.09, 90.22, 61.26, 90.17, 60.34, 70.61...
## $ PctSameState85        <dbl> 89.14, 96.12, 82.85, 96.24, 89.02, 85.66...
## $ LemasSwornFT          <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 198, ?, ?, ?,...
## $ LemasSwFTPerPop       <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 183.53, ?, ?,...
## $ LemasSwFTFieldOps     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 187, ?, ?, ?,...
## $ LemasSwFTFieldPerPop  <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 173.33, ?, ?,...
## $ LemasTotalReq         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 73432, ?, ?, ...
## $ LemasTotReqPerPop     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 68065.1, ?, ?...
## $ PolicReqPerOffic      <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 370.9, ?, ?, ...
## $ PolicPerPop           <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 183.5, ?, ?, ...
## $ RacialMatchCommPol    <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 89.32, ?, ?, ...
## $ PctPolicWhite         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 78.28, ?, ?, ...
## $ PctPolicBlack         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 11.11, ?, ?, ...
## $ PctPolicHisp          <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 10.61, ?, ?, ...
## $ PctPolicAsian         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, 0...
## $ PctPolicMinor         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 21.72, ?, ?, ...
## $ OfficAssgnDrugUnits   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 13, ?, ?, ?, ...
## $ NumKindsDrugsSeiz     <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 12, ?, ?, ?, ...
## $ PolicAveOTWorked      <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 60.2, ?, ?, ?...
## $ LandArea              <dbl> 6.5, 10.6, 10.6, 5.2, 11.5, 70.4, 10.9, ...
## $ PopDens               <dbl> 1845.9, 2186.7, 2780.9, 3217.7, 974.2, 1...
## $ PctUsePubTrans        <dbl> 9.63, 3.84, 4.37, 3.31, 0.38, 0.97, 9.62...
## $ PolicCars             <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 100, ?, ?, ?,...
## $ PolicOperBudg         <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 9315474, ?, ?...
## $ LemasPctPolicOnPatr   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 94.44, ?, ?, ...
## $ LemasGangUnitDeploy   <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 10, ?, ?, ?, ...
## $ LemasPctOfficDrugUn   <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00...
## $ PolicBudgPerPop       <fct> ?, ?, ?, ?, ?, ?, ?, ?, ?, 86346.3, ?, ?...
## $ murders               <int> 0, 0, 3, 0, 0, 7, 0, 8, 0, 29, 1, 12, 3,...
## $ murdPerPop            <dbl> 0.00, 0.00, 8.30, 0.00, 0.00, 4.63, 0.00...
## $ rapes                 <fct> 0, 1, 6, 10, ?, 77, 4, 34, 35, 141, 29, ...
## $ rapesPerPop           <fct> 0, 4.25, 16.6, 57.86, ?, 50.98, 13.53, 5...
## $ robberies             <fct> 1, 5, 56, 10, 4, 136, 9, 98, 16, 453, 71...
## $ robbbPerPop           <fct> 8.2, 21.26, 154.95, 57.86, 32.04, 90.05,...
## $ assaults              <fct> 4, 24, 14, 33, 14, 449, 54, 128, 41, 104...
## $ assaultPerPop         <fct> 32.81, 102.05, 38.74, 190.93, 112.14, 29...
## $ burglaries            <fct> 14, 57, 274, 225, 91, 2094, 110, 608, 42...
## $ burglPerPop           <fct> 114.85, 242.37, 758.14, 1301.78, 728.93,...
## $ larcenies             <fct> 138, 376, 1797, 716, 1060, 7690, 288, 22...
## $ larcPerPop            <fct> 1132.08, 1598.78, 4972.19, 4142.56, 8490...
## $ autoTheft             <fct> 16, 26, 136, 47, 91, 454, 144, 125, 206,...
## $ autoTheftPerPop       <fct> 131.26, 110.55, 376.3, 271.93, 728.93, 3...
## $ arsons                <fct> 2, 1, 22, ?, 5, 134, 17, 9, 8, 18, 6, 20...
## $ arsonsPerPop          <fct> 16.41, 4.25, 60.87, ?, 40.05, 88.72, 57....
## $ ViolentCrimesPerPop   <fct> 41.02, 127.56, 218.59, 306.64, ?, 442.95...
## $ nonViolPerPop         <fct> 1394.59, 1955.95, 6167.51, ?, 9988.79, 6...
## $ citystate             <chr> "BerkeleyHeightstownship NJ", "Marpletow...

Most of the variables have been classified as factors and integers. We need to reclassify them as numeric variables.

We redefine the variable “fold” as factor and from column 6 onwards as numeric

crime$fold = as.factor(crime$fold)                                        # classify 'fold' a numeric
crime[,6:ncol(crime)] = sapply(crime[,6:ncol(crime)], as.character) # reclassify column 6 onwards first as character first
crime[,6:ncol(crime)] = sapply(crime[,6:ncol(crime)], as.numeric)  # reclassify column 6 onwards as numeric

glimpse(crime)
## Observations: 2,215
## Variables: 148
## $ communityname         <fct> BerkeleyHeightstownship, Marpletownship,...
## $ state                 <fct> NJ, PA, OR, NY, MN, MO, MA, IN, ND, TX, ...
## $ countyCode            <fct> 39, 45, ?, 35, 7, ?, 21, ?, 17, ?, ?, ?,...
## $ communityCode         <fct> 5320, 47616, ?, 29443, 5068, ?, 50250, ?...
## $ fold                  <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ population            <dbl> 11980, 23123, 29344, 16656, 11245, 14049...
## $ householdsize         <dbl> 3.10, 2.82, 2.43, 2.40, 2.76, 2.45, 2.60...
## $ racepctblack          <dbl> 1.37, 0.80, 0.74, 1.70, 0.53, 2.51, 1.60...
## $ racePctWhite          <dbl> 91.78, 95.57, 94.33, 97.35, 89.16, 95.65...
## $ racePctAsian          <dbl> 6.50, 3.44, 3.43, 0.50, 1.17, 0.90, 1.47...
## $ racePctHisp           <dbl> 1.88, 0.85, 2.35, 0.70, 0.52, 0.95, 1.10...
## $ agePct12t21           <dbl> 12.47, 11.01, 11.36, 12.55, 24.46, 18.09...
## $ agePct12t29           <dbl> 21.44, 21.30, 25.88, 25.20, 40.53, 32.89...
## $ agePct16t24           <dbl> 10.93, 10.48, 11.01, 12.19, 28.69, 20.04...
## $ agePct65up            <dbl> 11.33, 17.18, 10.28, 17.57, 12.65, 13.26...
## $ numbUrban             <dbl> 11980, 23123, 29344, 0, 0, 140494, 28700...
## $ pctUrban              <dbl> 100.00, 100.00, 100.00, 0.00, 0.00, 100....
## $ medIncome             <dbl> 75122, 47917, 35669, 20580, 17390, 21577...
## $ pctWWage              <dbl> 89.24, 78.99, 82.00, 68.15, 69.33, 75.78...
## $ pctWFarmSelf          <dbl> 1.55, 1.11, 1.15, 0.24, 0.55, 1.00, 0.39...
## $ pctWInvInc            <dbl> 70.20, 64.11, 55.73, 38.95, 42.82, 41.15...
## $ pctWSocSec            <dbl> 23.62, 35.50, 22.25, 39.48, 32.16, 29.31...
## $ pctWPubAsst           <dbl> 1.03, 2.75, 2.94, 11.71, 11.21, 7.12, 5....
## $ pctWRetire            <dbl> 18.39, 22.85, 14.56, 18.33, 14.43, 14.09...
## $ medFamInc             <dbl> 79584, 55323, 42112, 26501, 24018, 27705...
## $ perCapInc             <dbl> 29711, 20148, 16946, 10810, 8483, 11878,...
## $ whitePerCap           <dbl> 30233, 20191, 17103, 10909, 9009, 12029,...
## $ blackPerCap           <dbl> 13600, 18137, 16644, 9984, 887, 7382, 17...
## $ indianPerCap          <dbl> 5725, 0, 21606, 4941, 4425, 10264, 21482...
## $ AsianPerCap           <dbl> 27101, 20074, 15528, 3541, 3352, 10753, ...
## $ OtherPerCap           <dbl> 5115, 5250, 5954, 2451, 3000, 7192, 2185...
## $ HispPerCap            <dbl> 22838, 12222, 8405, 4391, 1328, 8104, 22...
## $ NumUnderPov           <dbl> 227, 885, 1389, 2831, 2855, 23223, 1126,...
## $ PctPopUnderPov        <dbl> 1.96, 3.98, 4.75, 17.23, 29.99, 17.78, 4...
## $ PctLess9thGrade       <dbl> 5.81, 5.61, 2.80, 11.05, 12.15, 8.76, 4....
## $ PctNotHSGrad          <dbl> 9.90, 13.72, 9.09, 33.68, 23.06, 23.03, ...
## $ PctBSorMore           <dbl> 48.18, 29.89, 30.13, 10.81, 25.28, 20.66...
## $ PctUnemployed         <dbl> 2.70, 2.43, 4.01, 9.86, 9.08, 5.72, 4.85...
## $ PctEmploy             <dbl> 64.55, 61.96, 69.80, 54.74, 52.44, 59.02...
## $ PctEmplManu           <dbl> 14.65, 12.26, 15.95, 31.22, 6.89, 14.31,...
## $ PctEmplProfServ       <dbl> 28.82, 29.28, 21.52, 27.43, 36.54, 26.83...
## $ PctOccupManu          <dbl> 5.49, 6.39, 8.79, 26.76, 10.94, 14.72, 8...
## $ PctOccupMgmtProf      <dbl> 50.73, 37.64, 32.48, 22.71, 27.80, 23.42...
## $ MalePctDivorce        <dbl> 3.67, 4.23, 10.10, 10.98, 7.51, 11.40, 5...
## $ MalePctNevMarr        <dbl> 26.38, 27.99, 25.78, 28.15, 50.66, 33.32...
## $ FemalePctDiv          <dbl> 5.22, 6.45, 14.76, 14.47, 11.64, 14.46, ...
## $ TotalPctDiv           <dbl> 4.47, 5.42, 12.55, 12.91, 9.73, 13.04, 7...
## $ PersPerFam            <dbl> 3.22, 3.11, 2.95, 2.98, 2.98, 2.89, 3.14...
## $ PctFam2Par            <dbl> 91.43, 86.91, 78.54, 64.02, 58.59, 71.94...
## $ PctKids2Par           <dbl> 90.17, 85.33, 78.85, 62.36, 55.20, 69.79...
## $ PctYoungKids2Par      <dbl> 95.78, 96.82, 92.37, 65.38, 66.51, 79.76...
## $ PctTeen2Par           <dbl> 95.81, 86.46, 75.72, 67.43, 79.17, 75.33...
## $ PctWorkMomYoungKids   <dbl> 44.56, 51.14, 66.08, 59.59, 61.22, 62.96...
## $ PctWorkMom            <dbl> 58.88, 62.43, 74.19, 70.27, 68.94, 70.52...
## $ NumKidsBornNeverMar   <dbl> 31, 43, 164, 561, 402, 1511, 263, 2368, ...
## $ PctKidsBornNeverMar   <dbl> 0.36, 0.24, 0.88, 3.84, 4.70, 1.58, 1.18...
## $ NumImmig              <dbl> 1277, 1920, 1468, 339, 196, 2091, 2637, ...
## $ PctImmigRecent        <dbl> 8.69, 5.21, 16.42, 13.86, 46.94, 21.33, ...
## $ PctImmigRec5          <dbl> 13.00, 8.65, 23.98, 13.86, 56.12, 30.56,...
## $ PctImmigRec8          <dbl> 20.99, 13.33, 32.08, 15.34, 67.86, 38.02...
## $ PctImmigRec10         <dbl> 30.93, 22.50, 35.63, 15.34, 69.90, 45.48...
## $ PctRecentImmig        <dbl> 0.93, 0.43, 0.82, 0.28, 0.82, 0.32, 1.05...
## $ PctRecImmig5          <dbl> 1.39, 0.72, 1.20, 0.28, 0.98, 0.45, 1.49...
## $ PctRecImmig8          <dbl> 2.24, 1.11, 1.61, 0.31, 1.18, 0.57, 2.20...
## $ PctRecImmig10         <dbl> 3.30, 1.87, 1.78, 0.31, 1.22, 0.68, 2.55...
## $ PctSpeakEnglOnly      <dbl> 85.68, 87.79, 93.11, 94.98, 94.64, 96.87...
## $ PctNotSpeakEnglWell   <dbl> 1.37, 1.81, 1.14, 0.56, 0.39, 0.60, 0.60...
## $ PctLargHouseFam       <dbl> 4.81, 4.25, 2.97, 3.93, 5.23, 3.08, 5.08...
## $ PctLargHouseOccup     <dbl> 4.17, 3.34, 2.05, 2.56, 3.11, 1.92, 3.46...
## $ PersPerOccupHous      <dbl> 2.99, 2.70, 2.42, 2.37, 2.35, 2.28, 2.55...
## $ PersPerOwnOccHous     <dbl> 3.00, 2.83, 2.69, 2.51, 2.55, 2.37, 2.89...
## $ PersPerRentOccHous    <dbl> 2.84, 1.96, 2.06, 2.20, 2.12, 2.16, 2.09...
## $ PctPersOwnOccup       <dbl> 91.46, 89.03, 64.18, 58.18, 58.13, 57.81...
## $ PctPersDenseHous      <dbl> 0.39, 1.01, 2.03, 1.21, 2.94, 2.11, 1.47...
## $ PctHousLess3BR        <dbl> 11.06, 23.60, 47.46, 45.66, 55.64, 53.19...
## $ MedNumBR              <dbl> 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3...
## $ HousVacant            <dbl> 64, 240, 544, 669, 333, 5119, 566, 2051,...
## $ PctHousOccup          <dbl> 98.37, 97.15, 95.68, 91.19, 92.45, 91.81...
## $ PctHousOwnOcc         <dbl> 91.01, 84.88, 57.79, 54.89, 53.57, 55.50...
## $ PctVacantBoarded      <dbl> 3.12, 0.00, 0.92, 2.54, 3.90, 2.09, 1.41...
## $ PctVacMore6Mos        <dbl> 37.50, 18.33, 7.54, 57.85, 42.64, 26.22,...
## $ MedYrHousBuilt        <dbl> 1959, 1958, 1976, 1939, 1958, 1966, 1956...
## $ PctHousNoPhone        <dbl> 0.00, 0.31, 1.55, 7.00, 7.45, 6.13, 0.69...
## $ PctWOFullPlumb        <dbl> 0.28, 0.14, 0.12, 0.87, 0.82, 0.31, 0.28...
## $ OwnOccLowQuart        <dbl> 215900, 136300, 74700, 36400, 30600, 377...
## $ OwnOccMedVal          <dbl> 262600, 164200, 90400, 49600, 43200, 539...
## $ OwnOccHiQuart         <dbl> 326900, 199900, 112000, 66500, 59500, 73...
## $ OwnOccQrange          <dbl> 111000, 63600, 37300, 30100, 28900, 3540...
## $ RentLowQ              <dbl> 685, 467, 370, 195, 202, 215, 463, 186, ...
## $ RentMedian            <dbl> 1001, 560, 428, 250, 283, 280, 669, 253,...
## $ RentHighQ             <dbl> 1001, 672, 520, 309, 362, 349, 824, 325,...
## $ RentQrange            <dbl> 316, 205, 150, 114, 160, 134, 361, 139, ...
## $ MedRent               <dbl> 1001, 627, 484, 333, 332, 340, 736, 338,...
## $ MedRentPctHousInc     <dbl> 23.8, 27.6, 24.1, 28.7, 32.2, 26.4, 24.4...
## $ MedOwnCostPctInc      <dbl> 21.1, 20.7, 21.7, 20.6, 23.2, 17.3, 20.8...
## $ MedOwnCostPctIncNoMtg <dbl> 14.0, 12.5, 11.6, 14.5, 12.9, 11.7, 12.5...
## $ NumInShelters         <dbl> 11, 0, 16, 0, 2, 327, 0, 21, 125, 43, 1,...
## $ NumStreet             <dbl> 0, 0, 0, 0, 0, 4, 0, 0, 15, 4, 0, 49, 2,...
## $ PctForeignBorn        <dbl> 10.66, 8.30, 5.00, 2.04, 1.74, 1.49, 9.1...
## $ PctBornSameState      <dbl> 53.72, 77.17, 44.77, 88.71, 73.75, 64.35...
## $ PctSameHouse85        <dbl> 65.29, 71.27, 36.60, 56.70, 42.22, 42.29...
## $ PctSameCity85         <dbl> 78.09, 90.22, 61.26, 90.17, 60.34, 70.61...
## $ PctSameState85        <dbl> 89.14, 96.12, 82.85, 96.24, 89.02, 85.66...
## $ LemasSwornFT          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 198,...
## $ LemasSwFTPerPop       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 183....
## $ LemasSwFTFieldOps     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 187,...
## $ LemasSwFTFieldPerPop  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 173....
## $ LemasTotalReq         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 7343...
## $ LemasTotReqPerPop     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 6806...
## $ PolicReqPerOffic      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 370....
## $ PolicPerPop           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 183....
## $ RacialMatchCommPol    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 89.3...
## $ PctPolicWhite         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 78.2...
## $ PctPolicBlack         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 11.1...
## $ PctPolicHisp          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 10.6...
## $ PctPolicAsian         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.00...
## $ PctPolicMinor         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 21.7...
## $ OfficAssgnDrugUnits   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 13, ...
## $ NumKindsDrugsSeiz     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 12, ...
## $ PolicAveOTWorked      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 60.2...
## $ LandArea              <dbl> 6.5, 10.6, 10.6, 5.2, 11.5, 70.4, 10.9, ...
## $ PopDens               <dbl> 1845.9, 2186.7, 2780.9, 3217.7, 974.2, 1...
## $ PctUsePubTrans        <dbl> 9.63, 3.84, 4.37, 3.31, 0.38, 0.97, 9.62...
## $ PolicCars             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 100,...
## $ PolicOperBudg         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 9315...
## $ LemasPctPolicOnPatr   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 94.4...
## $ LemasGangUnitDeploy   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 10, ...
## $ LemasPctOfficDrugUn   <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00...
## $ PolicBudgPerPop       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 8634...
## $ murders               <dbl> 0, 0, 3, 0, 0, 7, 0, 8, 0, 29, 1, 12, 3,...
## $ murdPerPop            <dbl> 0.00, 0.00, 8.30, 0.00, 0.00, 4.63, 0.00...
## $ rapes                 <dbl> 0, 1, 6, 10, NA, 77, 4, 34, 35, 141, 29,...
## $ rapesPerPop           <dbl> 0.00, 4.25, 16.60, 57.86, NA, 50.98, 13....
## $ robberies             <dbl> 1, 5, 56, 10, 4, 136, 9, 98, 16, 453, 71...
## $ robbbPerPop           <dbl> 8.20, 21.26, 154.95, 57.86, 32.04, 90.05...
## $ assaults              <dbl> 4, 24, 14, 33, 14, 449, 54, 128, 41, 104...
## $ assaultPerPop         <dbl> 32.81, 102.05, 38.74, 190.93, 112.14, 29...
## $ burglaries            <dbl> 14, 57, 274, 225, 91, 2094, 110, 608, 42...
## $ burglPerPop           <dbl> 114.85, 242.37, 758.14, 1301.78, 728.93,...
## $ larcenies             <dbl> 138, 376, 1797, 716, 1060, 7690, 288, 22...
## $ larcPerPop            <dbl> 1132.08, 1598.78, 4972.19, 4142.56, 8490...
## $ autoTheft             <dbl> 16, 26, 136, 47, 91, 454, 144, 125, 206,...
## $ autoTheftPerPop       <dbl> 131.26, 110.55, 376.30, 271.93, 728.93, ...
## $ arsons                <dbl> 2, 1, 22, NA, 5, 134, 17, 9, 8, 18, 6, 2...
## $ arsonsPerPop          <dbl> 16.41, 4.25, 60.87, NA, 40.05, 88.72, 57...
## $ ViolentCrimesPerPop   <dbl> 41.02, 127.56, 218.59, 306.64, NA, 442.9...
## $ nonViolPerPop         <dbl> 1394.59, 1955.95, 6167.51, NA, 9988.79, ...
## $ citystate             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...

The first five coulumns are now classified as factors and the rest as numeric.


missing data

lets find the sum of missing values for each column

na_sums = colSums(is.na(crime)) # number of missing values for each row
data.frame(na_sums)             # View 
##                       na_sums
## communityname               0
## state                       0
## countyCode                  0
## communityCode               0
## fold                        0
## population                  0
## householdsize               0
## racepctblack                0
## racePctWhite                0
## racePctAsian                0
## racePctHisp                 0
## agePct12t21                 0
## agePct12t29                 0
## agePct16t24                 0
## agePct65up                  0
## numbUrban                   0
## pctUrban                    0
## medIncome                   0
## pctWWage                    0
## pctWFarmSelf                0
## pctWInvInc                  0
## pctWSocSec                  0
## pctWPubAsst                 0
## pctWRetire                  0
## medFamInc                   0
## perCapInc                   0
## whitePerCap                 0
## blackPerCap                 0
## indianPerCap                0
## AsianPerCap                 0
## OtherPerCap                 1
## HispPerCap                  0
## NumUnderPov                 0
## PctPopUnderPov              0
## PctLess9thGrade             0
## PctNotHSGrad                0
## PctBSorMore                 0
## PctUnemployed               0
## PctEmploy                   0
## PctEmplManu                 0
## PctEmplProfServ             0
## PctOccupManu                0
## PctOccupMgmtProf            0
## MalePctDivorce              0
## MalePctNevMarr              0
## FemalePctDiv                0
## TotalPctDiv                 0
## PersPerFam                  0
## PctFam2Par                  0
## PctKids2Par                 0
## PctYoungKids2Par            0
## PctTeen2Par                 0
## PctWorkMomYoungKids         0
## PctWorkMom                  0
## NumKidsBornNeverMar         0
## PctKidsBornNeverMar         0
## NumImmig                    0
## PctImmigRecent              0
## PctImmigRec5                0
## PctImmigRec8                0
## PctImmigRec10               0
## PctRecentImmig              0
## PctRecImmig5                0
## PctRecImmig8                0
## PctRecImmig10               0
## PctSpeakEnglOnly            0
## PctNotSpeakEnglWell         0
## PctLargHouseFam             0
## PctLargHouseOccup           0
## PersPerOccupHous            0
## PersPerOwnOccHous           0
## PersPerRentOccHous          0
## PctPersOwnOccup             0
## PctPersDenseHous            0
## PctHousLess3BR              0
## MedNumBR                    0
## HousVacant                  0
## PctHousOccup                0
## PctHousOwnOcc               0
## PctVacantBoarded            0
## PctVacMore6Mos              0
## MedYrHousBuilt              0
## PctHousNoPhone              0
## PctWOFullPlumb              0
## OwnOccLowQuart              0
## OwnOccMedVal                0
## OwnOccHiQuart               0
## OwnOccQrange                0
## RentLowQ                    0
## RentMedian                  0
## RentHighQ                   0
## RentQrange                  0
## MedRent                     0
## MedRentPctHousInc           0
## MedOwnCostPctInc            0
## MedOwnCostPctIncNoMtg       0
## NumInShelters               0
## NumStreet                   0
## PctForeignBorn              0
## PctBornSameState            0
## PctSameHouse85              0
## PctSameCity85               0
## PctSameState85              0
## LemasSwornFT             1872
## LemasSwFTPerPop          1872
## LemasSwFTFieldOps        1872
## LemasSwFTFieldPerPop     1872
## LemasTotalReq            1872
## LemasTotReqPerPop        1872
## PolicReqPerOffic         1872
## PolicPerPop              1872
## RacialMatchCommPol       1872
## PctPolicWhite            1872
## PctPolicBlack            1872
## PctPolicHisp             1872
## PctPolicAsian            1872
## PctPolicMinor            1872
## OfficAssgnDrugUnits      1872
## NumKindsDrugsSeiz        1872
## PolicAveOTWorked         1872
## LandArea                    0
## PopDens                     0
## PctUsePubTrans              0
## PolicCars                1872
## PolicOperBudg            1872
## LemasPctPolicOnPatr      1872
## LemasGangUnitDeploy      1872
## LemasPctOfficDrugUn         0
## PolicBudgPerPop          1872
## murders                     0
## murdPerPop                  0
## rapes                     208
## rapesPerPop               208
## robberies                   1
## robbbPerPop                 1
## assaults                   13
## assaultPerPop              13
## burglaries                  3
## burglPerPop                 3
## larcenies                   3
## larcPerPop                  3
## autoTheft                   3
## autoTheftPerPop             3
## arsons                     91
## arsonsPerPop               91
## ViolentCrimesPerPop       221
## nonViolPerPop              97
## citystate                2215

There are variables (eg ‘PctPolicAsian’) with 1872 missing values which is more than half of the observations for the variable. We shall drop all variable whose missing value is greater than 1103 which is 50% of the data observations.

To drop this columns I will select the subset with columns whose sum of missing values is less than 1103

crime_new <- crime %>% select_if(~sum(is.na(.)) < 1103)        #dropping columns with more than half of the values missing
glimpse(crime_new)
## Observations: 2,215
## Variables: 125
## $ communityname         <fct> BerkeleyHeightstownship, Marpletownship,...
## $ state                 <fct> NJ, PA, OR, NY, MN, MO, MA, IN, ND, TX, ...
## $ countyCode            <fct> 39, 45, ?, 35, 7, ?, 21, ?, 17, ?, ?, ?,...
## $ communityCode         <fct> 5320, 47616, ?, 29443, 5068, ?, 50250, ?...
## $ fold                  <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ population            <dbl> 11980, 23123, 29344, 16656, 11245, 14049...
## $ householdsize         <dbl> 3.10, 2.82, 2.43, 2.40, 2.76, 2.45, 2.60...
## $ racepctblack          <dbl> 1.37, 0.80, 0.74, 1.70, 0.53, 2.51, 1.60...
## $ racePctWhite          <dbl> 91.78, 95.57, 94.33, 97.35, 89.16, 95.65...
## $ racePctAsian          <dbl> 6.50, 3.44, 3.43, 0.50, 1.17, 0.90, 1.47...
## $ racePctHisp           <dbl> 1.88, 0.85, 2.35, 0.70, 0.52, 0.95, 1.10...
## $ agePct12t21           <dbl> 12.47, 11.01, 11.36, 12.55, 24.46, 18.09...
## $ agePct12t29           <dbl> 21.44, 21.30, 25.88, 25.20, 40.53, 32.89...
## $ agePct16t24           <dbl> 10.93, 10.48, 11.01, 12.19, 28.69, 20.04...
## $ agePct65up            <dbl> 11.33, 17.18, 10.28, 17.57, 12.65, 13.26...
## $ numbUrban             <dbl> 11980, 23123, 29344, 0, 0, 140494, 28700...
## $ pctUrban              <dbl> 100.00, 100.00, 100.00, 0.00, 0.00, 100....
## $ medIncome             <dbl> 75122, 47917, 35669, 20580, 17390, 21577...
## $ pctWWage              <dbl> 89.24, 78.99, 82.00, 68.15, 69.33, 75.78...
## $ pctWFarmSelf          <dbl> 1.55, 1.11, 1.15, 0.24, 0.55, 1.00, 0.39...
## $ pctWInvInc            <dbl> 70.20, 64.11, 55.73, 38.95, 42.82, 41.15...
## $ pctWSocSec            <dbl> 23.62, 35.50, 22.25, 39.48, 32.16, 29.31...
## $ pctWPubAsst           <dbl> 1.03, 2.75, 2.94, 11.71, 11.21, 7.12, 5....
## $ pctWRetire            <dbl> 18.39, 22.85, 14.56, 18.33, 14.43, 14.09...
## $ medFamInc             <dbl> 79584, 55323, 42112, 26501, 24018, 27705...
## $ perCapInc             <dbl> 29711, 20148, 16946, 10810, 8483, 11878,...
## $ whitePerCap           <dbl> 30233, 20191, 17103, 10909, 9009, 12029,...
## $ blackPerCap           <dbl> 13600, 18137, 16644, 9984, 887, 7382, 17...
## $ indianPerCap          <dbl> 5725, 0, 21606, 4941, 4425, 10264, 21482...
## $ AsianPerCap           <dbl> 27101, 20074, 15528, 3541, 3352, 10753, ...
## $ OtherPerCap           <dbl> 5115, 5250, 5954, 2451, 3000, 7192, 2185...
## $ HispPerCap            <dbl> 22838, 12222, 8405, 4391, 1328, 8104, 22...
## $ NumUnderPov           <dbl> 227, 885, 1389, 2831, 2855, 23223, 1126,...
## $ PctPopUnderPov        <dbl> 1.96, 3.98, 4.75, 17.23, 29.99, 17.78, 4...
## $ PctLess9thGrade       <dbl> 5.81, 5.61, 2.80, 11.05, 12.15, 8.76, 4....
## $ PctNotHSGrad          <dbl> 9.90, 13.72, 9.09, 33.68, 23.06, 23.03, ...
## $ PctBSorMore           <dbl> 48.18, 29.89, 30.13, 10.81, 25.28, 20.66...
## $ PctUnemployed         <dbl> 2.70, 2.43, 4.01, 9.86, 9.08, 5.72, 4.85...
## $ PctEmploy             <dbl> 64.55, 61.96, 69.80, 54.74, 52.44, 59.02...
## $ PctEmplManu           <dbl> 14.65, 12.26, 15.95, 31.22, 6.89, 14.31,...
## $ PctEmplProfServ       <dbl> 28.82, 29.28, 21.52, 27.43, 36.54, 26.83...
## $ PctOccupManu          <dbl> 5.49, 6.39, 8.79, 26.76, 10.94, 14.72, 8...
## $ PctOccupMgmtProf      <dbl> 50.73, 37.64, 32.48, 22.71, 27.80, 23.42...
## $ MalePctDivorce        <dbl> 3.67, 4.23, 10.10, 10.98, 7.51, 11.40, 5...
## $ MalePctNevMarr        <dbl> 26.38, 27.99, 25.78, 28.15, 50.66, 33.32...
## $ FemalePctDiv          <dbl> 5.22, 6.45, 14.76, 14.47, 11.64, 14.46, ...
## $ TotalPctDiv           <dbl> 4.47, 5.42, 12.55, 12.91, 9.73, 13.04, 7...
## $ PersPerFam            <dbl> 3.22, 3.11, 2.95, 2.98, 2.98, 2.89, 3.14...
## $ PctFam2Par            <dbl> 91.43, 86.91, 78.54, 64.02, 58.59, 71.94...
## $ PctKids2Par           <dbl> 90.17, 85.33, 78.85, 62.36, 55.20, 69.79...
## $ PctYoungKids2Par      <dbl> 95.78, 96.82, 92.37, 65.38, 66.51, 79.76...
## $ PctTeen2Par           <dbl> 95.81, 86.46, 75.72, 67.43, 79.17, 75.33...
## $ PctWorkMomYoungKids   <dbl> 44.56, 51.14, 66.08, 59.59, 61.22, 62.96...
## $ PctWorkMom            <dbl> 58.88, 62.43, 74.19, 70.27, 68.94, 70.52...
## $ NumKidsBornNeverMar   <dbl> 31, 43, 164, 561, 402, 1511, 263, 2368, ...
## $ PctKidsBornNeverMar   <dbl> 0.36, 0.24, 0.88, 3.84, 4.70, 1.58, 1.18...
## $ NumImmig              <dbl> 1277, 1920, 1468, 339, 196, 2091, 2637, ...
## $ PctImmigRecent        <dbl> 8.69, 5.21, 16.42, 13.86, 46.94, 21.33, ...
## $ PctImmigRec5          <dbl> 13.00, 8.65, 23.98, 13.86, 56.12, 30.56,...
## $ PctImmigRec8          <dbl> 20.99, 13.33, 32.08, 15.34, 67.86, 38.02...
## $ PctImmigRec10         <dbl> 30.93, 22.50, 35.63, 15.34, 69.90, 45.48...
## $ PctRecentImmig        <dbl> 0.93, 0.43, 0.82, 0.28, 0.82, 0.32, 1.05...
## $ PctRecImmig5          <dbl> 1.39, 0.72, 1.20, 0.28, 0.98, 0.45, 1.49...
## $ PctRecImmig8          <dbl> 2.24, 1.11, 1.61, 0.31, 1.18, 0.57, 2.20...
## $ PctRecImmig10         <dbl> 3.30, 1.87, 1.78, 0.31, 1.22, 0.68, 2.55...
## $ PctSpeakEnglOnly      <dbl> 85.68, 87.79, 93.11, 94.98, 94.64, 96.87...
## $ PctNotSpeakEnglWell   <dbl> 1.37, 1.81, 1.14, 0.56, 0.39, 0.60, 0.60...
## $ PctLargHouseFam       <dbl> 4.81, 4.25, 2.97, 3.93, 5.23, 3.08, 5.08...
## $ PctLargHouseOccup     <dbl> 4.17, 3.34, 2.05, 2.56, 3.11, 1.92, 3.46...
## $ PersPerOccupHous      <dbl> 2.99, 2.70, 2.42, 2.37, 2.35, 2.28, 2.55...
## $ PersPerOwnOccHous     <dbl> 3.00, 2.83, 2.69, 2.51, 2.55, 2.37, 2.89...
## $ PersPerRentOccHous    <dbl> 2.84, 1.96, 2.06, 2.20, 2.12, 2.16, 2.09...
## $ PctPersOwnOccup       <dbl> 91.46, 89.03, 64.18, 58.18, 58.13, 57.81...
## $ PctPersDenseHous      <dbl> 0.39, 1.01, 2.03, 1.21, 2.94, 2.11, 1.47...
## $ PctHousLess3BR        <dbl> 11.06, 23.60, 47.46, 45.66, 55.64, 53.19...
## $ MedNumBR              <dbl> 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 3...
## $ HousVacant            <dbl> 64, 240, 544, 669, 333, 5119, 566, 2051,...
## $ PctHousOccup          <dbl> 98.37, 97.15, 95.68, 91.19, 92.45, 91.81...
## $ PctHousOwnOcc         <dbl> 91.01, 84.88, 57.79, 54.89, 53.57, 55.50...
## $ PctVacantBoarded      <dbl> 3.12, 0.00, 0.92, 2.54, 3.90, 2.09, 1.41...
## $ PctVacMore6Mos        <dbl> 37.50, 18.33, 7.54, 57.85, 42.64, 26.22,...
## $ MedYrHousBuilt        <dbl> 1959, 1958, 1976, 1939, 1958, 1966, 1956...
## $ PctHousNoPhone        <dbl> 0.00, 0.31, 1.55, 7.00, 7.45, 6.13, 0.69...
## $ PctWOFullPlumb        <dbl> 0.28, 0.14, 0.12, 0.87, 0.82, 0.31, 0.28...
## $ OwnOccLowQuart        <dbl> 215900, 136300, 74700, 36400, 30600, 377...
## $ OwnOccMedVal          <dbl> 262600, 164200, 90400, 49600, 43200, 539...
## $ OwnOccHiQuart         <dbl> 326900, 199900, 112000, 66500, 59500, 73...
## $ OwnOccQrange          <dbl> 111000, 63600, 37300, 30100, 28900, 3540...
## $ RentLowQ              <dbl> 685, 467, 370, 195, 202, 215, 463, 186, ...
## $ RentMedian            <dbl> 1001, 560, 428, 250, 283, 280, 669, 253,...
## $ RentHighQ             <dbl> 1001, 672, 520, 309, 362, 349, 824, 325,...
## $ RentQrange            <dbl> 316, 205, 150, 114, 160, 134, 361, 139, ...
## $ MedRent               <dbl> 1001, 627, 484, 333, 332, 340, 736, 338,...
## $ MedRentPctHousInc     <dbl> 23.8, 27.6, 24.1, 28.7, 32.2, 26.4, 24.4...
## $ MedOwnCostPctInc      <dbl> 21.1, 20.7, 21.7, 20.6, 23.2, 17.3, 20.8...
## $ MedOwnCostPctIncNoMtg <dbl> 14.0, 12.5, 11.6, 14.5, 12.9, 11.7, 12.5...
## $ NumInShelters         <dbl> 11, 0, 16, 0, 2, 327, 0, 21, 125, 43, 1,...
## $ NumStreet             <dbl> 0, 0, 0, 0, 0, 4, 0, 0, 15, 4, 0, 49, 2,...
## $ PctForeignBorn        <dbl> 10.66, 8.30, 5.00, 2.04, 1.74, 1.49, 9.1...
## $ PctBornSameState      <dbl> 53.72, 77.17, 44.77, 88.71, 73.75, 64.35...
## $ PctSameHouse85        <dbl> 65.29, 71.27, 36.60, 56.70, 42.22, 42.29...
## $ PctSameCity85         <dbl> 78.09, 90.22, 61.26, 90.17, 60.34, 70.61...
## $ PctSameState85        <dbl> 89.14, 96.12, 82.85, 96.24, 89.02, 85.66...
## $ LandArea              <dbl> 6.5, 10.6, 10.6, 5.2, 11.5, 70.4, 10.9, ...
## $ PopDens               <dbl> 1845.9, 2186.7, 2780.9, 3217.7, 974.2, 1...
## $ PctUsePubTrans        <dbl> 9.63, 3.84, 4.37, 3.31, 0.38, 0.97, 9.62...
## $ LemasPctOfficDrugUn   <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00...
## $ murders               <dbl> 0, 0, 3, 0, 0, 7, 0, 8, 0, 29, 1, 12, 3,...
## $ murdPerPop            <dbl> 0.00, 0.00, 8.30, 0.00, 0.00, 4.63, 0.00...
## $ rapes                 <dbl> 0, 1, 6, 10, NA, 77, 4, 34, 35, 141, 29,...
## $ rapesPerPop           <dbl> 0.00, 4.25, 16.60, 57.86, NA, 50.98, 13....
## $ robberies             <dbl> 1, 5, 56, 10, 4, 136, 9, 98, 16, 453, 71...
## $ robbbPerPop           <dbl> 8.20, 21.26, 154.95, 57.86, 32.04, 90.05...
## $ assaults              <dbl> 4, 24, 14, 33, 14, 449, 54, 128, 41, 104...
## $ assaultPerPop         <dbl> 32.81, 102.05, 38.74, 190.93, 112.14, 29...
## $ burglaries            <dbl> 14, 57, 274, 225, 91, 2094, 110, 608, 42...
## $ burglPerPop           <dbl> 114.85, 242.37, 758.14, 1301.78, 728.93,...
## $ larcenies             <dbl> 138, 376, 1797, 716, 1060, 7690, 288, 22...
## $ larcPerPop            <dbl> 1132.08, 1598.78, 4972.19, 4142.56, 8490...
## $ autoTheft             <dbl> 16, 26, 136, 47, 91, 454, 144, 125, 206,...
## $ autoTheftPerPop       <dbl> 131.26, 110.55, 376.30, 271.93, 728.93, ...
## $ arsons                <dbl> 2, 1, 22, NA, 5, 134, 17, 9, 8, 18, 6, 2...
## $ arsonsPerPop          <dbl> 16.41, 4.25, 60.87, NA, 40.05, 88.72, 57...
## $ ViolentCrimesPerPop   <dbl> 41.02, 127.56, 218.59, 306.64, NA, 442.9...
## $ nonViolPerPop         <dbl> 1394.59, 1955.95, 6167.51, NA, 9988.79, ...

The new data now has 125 variables. 23 variables were dropped.

We will work with the pre-processed dataset crime_new