Data 607 Assignment 3

Problem 3

Setup:

library(stringr)

raw.data <- "555-1239Moe Szyslak(636) 555-0113Burns, C. Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,Homer5553642Dr. Julius Hibbert"
name <- unlist(str_extract_all(raw.data, "[[:alpha:]., ]{2,}"))

Part 1:

othellor<-function(x){ ###this puts things into the standard firstname lastname
  if (str_detect(x,",")){
    x<-unlist(str_split(x,","))
    tmp<-x[1]
    x[1]<-x[2]
    x[2]<-tmp
    x<-paste(x[1],x[2])
  }
return(x)  
}
name2<-name

##lapply is giving me real weirdness so I used a loop
for(i in 1:6){
  name2[i]<-othellor(name[i])
}
name2<-trim(name2) #there's a space floating around in front of the C, getting rid of it, I used gdata, but there were warnings, so I put echo=FALSE on it
name2

## [1] "Moe Szyslak"          "C. Montgomery Burns"  "Rev. Timothy Lovejoy"
## [4] "Ned Flanders"         "Homer Simpson"        "Dr. Julius Hibbert"

Part 2:

titles<-"[a-z]+\\."
str_detect(name,titles)

## [1] FALSE FALSE  TRUE FALSE FALSE  TRUE

Part 3:

twoname<-"[A-Z]\\."
str_detect(name,twoname)

## [1] FALSE  TRUE FALSE FALSE FALSE FALSE

Problem 4:

cash<-"[0-9]+\\$"
#numbers followed by dollar sign, eg. 123$
str_detect("123$",cash)

## [1] TRUE

shortname<-"\\b[a-z]{1,4}\\b"
#at least one lowercase letter but no more than 4: "a" is ok, so is "azde", but "azdef" is not
str_detect("azde",shortname)

## [1] TRUE

str_detect("azdef",shortname)

## [1] FALSE

textdoc<-".*?\\.txt$"
#any string that ends in ".txt" eg. "misslucyhadasteamboat.txt"
str_detect("misslucyhadasteamboat.txt",textdoc)

## [1] TRUE

crazydate<-"\\d{2}/\\d{2}/\\d{4}"
#anything in the form of a standard American date, however the numbers do not have to corespond to real month and day numbers, so "33/44/0304" works
str_detect("33/44/0304",crazydate)

## [1] TRUE

tags<-"<(.+?)>.+?</\\1>"
#this detects opening and closing URL tags like "<em>emphasis</em>"  The opening and closing tags must be the same, it cannot handle a linking tag like <a href=""></a>  To do that, we can use the expression "<(.+?)\\s{0,1}.*?>.+?</\\1>" which can handle links and normal tags
str_detect("<em>emphasis</em>",tags)

## [1] TRUE

##now showing the improved version:
supertags<-"<(.+?)\\s{0,1}.*?>.+?</\\1>"
str_detect("<em>emphasis</em>",supertags)

## [1] TRUE

str_detect('<a href="https://www.google.com">Google</a>',supertags)

## [1] TRUE

Problem 9

secret<-"clcopCow1zmstc0d87wnkig7OvdicpNuggvhryn92Gjuwczi8hqrfpRxs5Aj5dwpn0TanwoUwisdij7Lj8kpf03AT5Idr3coc0bt7yczjatOaootj55t3Nj3ne6c4Sfek.r1w1YwwojigOd6vrfUrbz2.2bkAnbhzgv4R9i05zEcrop.wAgnb.SqoU65fPa1otfb7wEm24k6t3sR9zqe5fy89n6Nd5t9kc4fE905gmc4Rgxo5nhDk!gr"
sec2<-gsub("\\d","",secret)
sec3<-gsub("[a-z]","",sec2)  #now getting the formatting right
sec4<-sub("\\.","! ",sec3)
sec5<-gsub("\\."," ",sec4)
sec5

## [1] "CONGRATULATIONS! YOU ARE A SUPERNERD!"

Data 607 Assignment 3

Jason Givens-Doyle

September 15, 2018

Problem 3

Setup:

Part 1:

Part 2:

Part 3:

Problem 4:

Problem 9