Read the data
count <- function(cause=NULL){
homicides <- readLines("homicides.txt")
r <- regexpr("<dd>[Cc]ause(.*?)</dd>", homicides)
m <- regmatches(homicides, r)
all_cause<- gsub("<dd>[Cc]ause: |</dd>", "", m)
if(cause %in% NULL) stop("The cause should be non-NULL")
if(cause %in% unique(all_cause)){
return(length(grep(cause,all_cause,ignore.case = T)))
}
else stop("The cause does not exist")
}
count("shooting")
## [1] 1003
agecount <- function(age=NULL){
homicides <- readLines("homicides.txt")
r <- regexpr("<dd>(.*?)years old</dd>", homicides)
m <- regmatches(homicides, r)
all_age <- as.numeric(regmatches(m,regexpr("[0-9][0-9]",m)))
if(age %in% NULL) stop("The age should be non-NULL")
return(sum(all_age==age))
}
agecount(21)
## [1] 60
# all_age<- sub(": | years old</dd>", "", m)