Automated Data Collection in R, Chapter 8, Problems 3, 7, 8, 9.

Load Packages

library(stringr)
library(XML)
library(RCurl)
## Loading required package: bitops

#3

Determine if the name has a title.

Look for 2 or more letters followed by a period.

hastitle<-str_detect(name,"[[:alpha:]]{2,}\\.")
hastitle
## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Determine if the name has two first names.

Look for 1 uppercase letter followed by a period.

multiplenames<-str_detect(name,"[[:upper:]]{1}\\.")
multiplenames
## [1] FALSE  TRUE FALSE FALSE FALSE FALSE
# This would not detect first names without a period.

#7

Extract first html tag only.

The original expression failed because it was extracting the longest matching result. To get the desired result, we can extract the tag with only letter (alpha) characters.

astring<-c("<title>++BREAKING NEWS+++</title>")
firsttag<-str_extract(astring,"<[[:alpha:]]+>")
firsttag
## [1] "<title>"

#8

Extract binomial formula.

The original expression failed because it did not capture the carat or minus sign.

bstring<-c("(5-3)^2=5^2-2*5*3+3^2 conforms to the binomial theorem")
bformula<-str_extract(bstring,"[0-9=+*()+\\^\\-]+")
bformula
## [1] "(5-3)^2=5^2-2*5*3+3^2"

#9

Extract secret message.

secretmessage <- c("clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr")
decode<-unlist(str_extract_all(secretmessage,"[[:upper:]]|\\."))
combined<-str_c(decode,sep="",collapse ="")
answer<-gsub("\\."," ",combined)
cat(answer)
## CONGRATULATIONS YOU ARE A SUPERNERD
justforme<-(str_c(answer, " ARMENOUSH"))
cat(justforme)
## CONGRATULATIONS YOU ARE A SUPERNERD ARMENOUSH