You run these R code chunks to clean text variables, if you need to
# create a folder for the data
if(!file.exists("./data")){dir.create("./data")}
#Get Data From the Web
fileUrl <-"https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?acessType=DOWNLOAD"
download.file(fileUrl, destfile = "./data/cameras.csv")
CameraData <-read.csv("./data/cameras.csv")
names(CameraData)
## [1] "address" "direction"
## [3] "street" "crossStreet"
## [5] "intersection" "Location.1"
## [7] "X2010.Census.Neighborhoods" "X2010.Census.Wards.Precincts"
## [9] "Zip.Codes"
# make all letters lowercase:
tolower(names(CameraData))
## [1] "address" "direction"
## [3] "street" "crossstreet"
## [5] "intersection" "location.1"
## [7] "x2010.census.neighborhoods" "x2010.census.wards.precincts"
## [9] "zip.codes"
# split variable names:
splitNames = strsplit(names(CameraData), "\\.")
splitNames[[6]]
## [1] "Location" "1"
# select first element
firstElement <-function(x){x[1]}
sapply(splitNames, firstElement)
## [1] "address" "direction" "street" "crossStreet" "intersection"
## [6] "Location" "X2010" "X2010" "Zip"
#read the data
if(!file.exists("./data")){dir.create("./data")}
fileUrl1 = "https://raw.githubusercontent.com/DataScienceSpecialization/courses/master/03_GettingData/04_01_editingTextVariables/data/reviews.csv
"
fileUrl2 = "https://raw.githubusercontent.com/DataScienceSpecialization/courses/master/03_GettingData/04_01_editingTextVariables/data/solutions.csv
"
download.file(fileUrl1,destfile="./data/reviews.csv")
download.file(fileUrl2,destfile="./data/solutions.csv")
reviews = read.csv("./data/reviews.csv"); solutions <- read.csv("./data/solutions.csv")
head(reviews,2)
## id solution_id reviewer_id start stop time_left accept
## 1 1 3 27 1304095698 1304095758 1754 1
## 2 2 4 22 1304095188 1304095206 2306 1
names(reviews)
## [1] "id" "solution_id" "reviewer_id" "start" "stop"
## [6] "time_left" "accept"
sub("_", "", names(reviews),)
## [1] "id" "solutionid" "reviewerid" "start" "stop"
## [6] "timeleft" "accept"
testName <-"I_am_Wonder_Woman"
sub("_", "", testName,)
## [1] "Iam_Wonder_Woman"
# remove all underscores
gsub("_", "", testName,)
## [1] "IamWonderWoman"
#First check if there are records
table(grepl("Alameda", CameraData$intersection))
##
## FALSE TRUE
## 77 3
#then inspect
grep("Alameda", CameraData$intersection)
## [1] 65 69 79
#Subset
CameraData2 <-CameraData[!grepl("Alameda", CameraData$intersection), ]
# return values
grep("Alameda", CameraData$intersection, value = TRUE)
## [1] "E 33rd & The Alameda" "The Alameda & 33rd St"
## [3] "Harford \n & The Alameda"
# check if a value exists
length(grep("JeffStreet", CameraData$intersection))
## [1] 0
#[1] 0 does not exist
library(stringr)
nchar("Linda Angulo Lopez")
## [1] 18
#join
paste("Linda", "Lopez", "!?that's not my name")
## [1] "Linda Lopez !?that's not my name"
paste("My name is", substr("Linda Angulo Lopez", 1,12 ))
## [1] "My name is Linda Angulo"
#join with no space
paste0("@","lindangulopez")
## [1] "@lindangulopez"
#trim off excess space before and after string
str_trim(" Tweet me ")
## [1] "Tweet me"
myList <- list(letters = c("a","b","c"), numbers= 1:3, matrix(1:25, ncol=5))
#inspect list elements
myList$letters ; myList[1]
## [1] "a" "b" "c"
## $letters
## [1] "a" "b" "c"
myList$numbers ; myList[2]
## [1] 1 2 3
## $numbers
## [1] 1 2 3
myList[3]
## [[1]]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 6 11 16 21
## [2,] 2 7 12 17 22
## [3,] 3 8 13 18 23
## [4,] 4 9 14 19 24
## [5,] 5 10 15 20 25
#select element in matrix
myList[[3]][2,3]
## [1] 12