Simpsons

Load packages

Load data

setwd("C:/Users/cassandra/Desktop/607")
simpson<- read.csv('simpson.csv')
simpson

##                     name        phone
## 1            Moe Szyslak     555-1239
## 2 Burns, C. Montogomery      555-0113
## 3   Rev. Timothy Lovejoy     555-6542
## 4           Ned Flanders     555-8904
## 5         Simpson, Homer 636-555-3226
## 6     Dr. Julius Hibbert      5553642

Change Name column to character

simpson$name <- as.character(simpson$name)

Change the order of the names

simpson$name[grepl("Burns, C. Montogomery",simpson$name, ignore.case = TRUE)] <- "Montogomery C._Burns"
simpson$name[grepl("Moe Szyslak",simpson$name, ignore.case = TRUE)] <- "Moe_Szyslak"
simpson$name[grepl("Rev. Timothy Lovejoy",simpson$name, ignore.case = TRUE)] <- "Rev.Timothy_Lovejoy"
simpson$name[grepl("Ned Flanders",simpson$name, ignore.case = TRUE)] <- "Ned_Flanders"
simpson$name[grepl("Simpson, Homer",simpson$name, ignore.case = TRUE)] <- "Homer_Simpson"
simpson$name[grepl("Dr. Julius Hibbert",simpson$name, ignore.case = TRUE)] <- "Dr.Julius_Hibbert"
simpson

##                   name        phone
## 1          Moe_Szyslak     555-1239
## 2 Montogomery C._Burns     555-0113
## 3  Rev.Timothy_Lovejoy     555-6542
## 4         Ned_Flanders     555-8904
## 5        Homer_Simpson 636-555-3226
## 6    Dr.Julius_Hibbert      5553642

Create first and last name column

names <- simpson %>% separate(name, c("firstname", "lastname"), sep = "_" )
names

##        firstname lastname        phone
## 1            Moe  Szyslak     555-1239
## 2 Montogomery C.    Burns     555-0113
## 3    Rev.Timothy  Lovejoy     555-6542
## 4            Ned Flanders     555-8904
## 5          Homer  Simpson 636-555-3226
## 6      Dr.Julius  Hibbert      5553642

Construct a logical vector indicating whether a charcter has a title

title <- str_extract_all(simpson$name,c('Rev', 'Dr'))
title

## [[1]]
## character(0)
## 
## [[2]]
## character(0)
## 
## [[3]]
## [1] "Rev"
## 
## [[4]]
## character(0)
## 
## [[5]]
## character(0)
## 
## [[6]]
## [1] "Dr"

Describe the types of strings that conform to the following regular expressions and construct an example that is matched by the regular expression.

#[0-9]+\\$ # Used to find any character that is not a digit
str_count(simpson, "[0-9]+\\$")

## [1] 0 0

#\\b[a-z]{1,4}\\b # Match lowercase letters if the first four characters is a word character
str_extract(simpson, "\\b[a-z]{1,4}\\b")

## [1] "c" "c"

#.*?\\.txt$ # Items will be matched zero or more times
str_extract(simpson, ".*?\\.txt$")

## [1] NA NA

#\\d{2}/\\d{2}/\\d{4}# Extracting numbers from a string, a set the two numbers in the area code, and 2 numbers from the middle and four digits from the end

str_extract_all(simpson, "\\d{2}/\\d{2}/\\d{4}")

## [[1]]
## character(0)
## 
## [[2]]
## character(0)

#<(.+?)>.+?</\\1> # Word beginning looking for substring one regardless what comes between (.+?)>

str_extract(simpson,"<(.+?)>.+?</\\1>")

## [1] NA NA