Creating New Variables

Import Data

# create a folder for the data
#if(!file.exists("./data")){dir.create("./data")}

#Get Data From the Web
#fileUrl <-"https://data.baltimorecity.gov/api/views/k5ry-ef3g/rows.csv?accessType=DOWNLOAD"
#download.file(fileUrl, destfile = "./data/restData.csv")
restData <-read.csv("./data/restData.csv")

###Create Sequences to Index Operations:

#create different types of sequences

s1 <- seq(1,10,by=2); s1
## [1] 1 3 5 7 9
s2 <-seq(1,10,length=3);s2
## [1]  1.0  5.5 10.0
# sequence to loop over the data
x <-c(1,3,8,25,100); seq(along=x)
## [1] 1 2 3 4 5

###Create a new Variable

# Create a new variable from Subset Data

restData$nearMe = restData$neighborhood %in% c("Roland Park", "Homeland")

#Inspect
table(restData$nearMe)
## 
## FALSE  TRUE 
##  1314    13

######Create Binary Variables

#upload data

restData$ZipWrong = ifelse(restData$zipCode <0, TRUE, FALSE)

#Show Results
table(restData$ZipWrong,restData$zipCode <0 )
##        
##         FALSE TRUE
##   FALSE  1326    0
##   TRUE      0    1

Make Categorial Variables

#old school 
restData$zipGroups = cut(restData$zipCode, breaks = quantile(restData$zipCode))
table(restData$zipGroups)
## 
## (-2.123e+04,2.12e+04]  (2.12e+04,2.122e+04] (2.122e+04,2.123e+04] 
##                   337                   375                   282 
## (2.123e+04,2.129e+04] 
##                   332
table(restData$zipGroups, restData$zipCode)
##                        
##                         -21226 21201 21202 21205 21206 21207 21208 21209 21210
##   (-2.123e+04,2.12e+04]      0   136   201     0     0     0     0     0     0
##   (2.12e+04,2.122e+04]       0     0     0    27    30     4     1     8    23
##   (2.122e+04,2.123e+04]      0     0     0     0     0     0     0     0     0
##   (2.123e+04,2.129e+04]      0     0     0     0     0     0     0     0     0
##                        
##                         21211 21212 21213 21214 21215 21216 21217 21218 21220
##   (-2.123e+04,2.12e+04]     0     0     0     0     0     0     0     0     0
##   (2.12e+04,2.122e+04]     41    28    31    17    54    10    32    69     0
##   (2.122e+04,2.123e+04]     0     0     0     0     0     0     0     0     1
##   (2.123e+04,2.129e+04]     0     0     0     0     0     0     0     0     0
##                        
##                         21222 21223 21224 21225 21226 21227 21229 21230 21231
##   (-2.123e+04,2.12e+04]     0     0     0     0     0     0     0     0     0
##   (2.12e+04,2.122e+04]      0     0     0     0     0     0     0     0     0
##   (2.122e+04,2.123e+04]     7    56   199    19     0     0     0     0     0
##   (2.123e+04,2.129e+04]     0     0     0     0    18     4    13   156   127
##                        
##                         21234 21237 21239 21251 21287
##   (-2.123e+04,2.12e+04]     0     0     0     0     0
##   (2.12e+04,2.122e+04]      0     0     0     0     0
##   (2.122e+04,2.123e+04]     0     0     0     0     0
##   (2.123e+04,2.129e+04]     7     1     3     2     1
#Hmisc method with cut() to create factor variables
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
restData$zipGroup =cut2(restData$zipCode, g=4)
table(restData$zipGroup)
## 
## [-21226,21205) [ 21205,21220) [ 21220,21227) [ 21227,21287] 
##            338            375            300            314

Add the new variables to the Data Frame

library(Hmisc); library(plyr)
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize
# Add the new variables to the Data Frame
restData2 = mutate(restData, zipGroups=cut2(zipCode, g=4))
table(restData2$zipGroups)
## 
## [-21226,21205) [ 21205,21220) [ 21220,21227) [ 21227,21287] 
##            338            375            300            314
head(restData2)
##                    name zipCode neighborhood councilDistrict policeDistrict
## 1                   410   21206    Frankford               2   NORTHEASTERN
## 2                  1919   21231  Fells Point               1   SOUTHEASTERN
## 3                 SAUTE   21224       Canton               1   SOUTHEASTERN
## 4    #1 CHINESE KITCHEN   21211      Hampden              14       NORTHERN
## 5 #1 chinese restaurant   21223     Millhill               9   SOUTHWESTERN
## 6             19TH HOLE   21218 Clifton Park              14   NORTHEASTERN
##                          Location.1 X2010.Census.Neighborhoods
## 1   4509 BELAIR ROAD\nBaltimore, MD                         NA
## 2      1919 FLEET ST\nBaltimore, MD                         NA
## 3     2844 HUDSON ST\nBaltimore, MD                         NA
## 4    3998 ROLAND AVE\nBaltimore, MD                         NA
## 5 2481 frederick ave\nBaltimore, MD                         NA
## 6    2722 HARFORD RD\nBaltimore, MD                         NA
##   X2010.Census.Wards.Precincts Zip.Codes nearMe ZipWrong      zipGroups
## 1                           NA        NA  FALSE    FALSE [ 21205,21220)
## 2                           NA        NA  FALSE    FALSE [ 21227,21287]
## 3                           NA        NA  FALSE    FALSE [ 21220,21227)
## 4                           NA        NA  FALSE    FALSE [ 21205,21220)
## 5                           NA        NA  FALSE    FALSE [ 21220,21227)
## 6                           NA        NA  FALSE    FALSE [ 21205,21220)
##         zipGroup
## 1 [ 21205,21220)
## 2 [ 21227,21287]
## 3 [ 21220,21227)
## 4 [ 21205,21220)
## 5 [ 21220,21227)
## 6 [ 21205,21220)

This is an R Markdown document, feel free to reach out for finer details.