fruits <- c('Apple', 'Banana', 'Orange', 'Grape', 'Pineapple', 'Kiwi', 'Peach', 'Mango', 'Strawberry', 'Guava', 'Cherry', 'Apple', 'banana')
fruits =="Banana" #exact match
## [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
which(fruits == "Banana") #use which to get position
## [1] 2
breakfast = c("Apple", "Banana", "Apple", "banana")
fruits %in% breakfast
## [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
## [13] TRUE
match(breakfast, fruits) #notice that only the first match is returned
## [1] 1 2 1 13
fruits == "bana" #how do we search for pattern?
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE
grep: Identifying strings that match a particular criteria
grep(pattern = 'bana', fruits, value = TRUE, ignore.case = TRUE)
## [1] "Banana" "banana"
gsub: Replacing instances of a string with another of your choosing
gsub(pattern = "Ch", replacement = "B", fruits[11])
## [1] "Berry"
breakfast.ab = c("App", "bana")
pmatch(breakfast.ab, fruits) #notice that Apple is not unique so it won't work
## [1] NA 13
grep("nana", fruits) #grep works but one pattern at a time
## [1] 2 13
gregexpr: Identifying positions of a string of interest
(positions_a <- gregexpr(pattern = "a", text = fruits, ignore.case = TRUE))
## [[1]]
## [1] 1
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[2]]
## [1] 2 4 6
## attr(,"match.length")
## [1] 1 1 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[3]]
## [1] 3
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[4]]
## [1] 3
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[5]]
## [1] 5
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[6]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[7]]
## [1] 3
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[8]]
## [1] 2
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[9]]
## [1] 4
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[10]]
## [1] 3 5
## attr(,"match.length")
## [1] 1 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[11]]
## [1] -1
## attr(,"match.length")
## [1] -1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[12]]
## [1] 1
## attr(,"match.length")
## [1] 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
##
## [[13]]
## [1] 2 4 6
## attr(,"match.length")
## [1] 1 1 1
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
positions_a ### Paste
Use seperators to combine strings with other characters or numbers
paste("X", 1:5, sep = ".")
## [1] "X.1" "X.2" "X.3" "X.4" "X.5"
Use collapse to combine multiple string outputs together
paste("X", 1:5, sep = ".", collapse = "")
## [1] "X.1X.2X.3X.4X.5"
paste0 is the same as paste with an empty seperator
paste0("X", 1:5)
## [1] "X1" "X2" "X3" "X4" "X5"
NOTE: paste0 does not have a “sep” option that can be modified by the user
paste0(“a”, “b”, sep = “c”) == paste0(“a”, “b”, “c”)
paste0(rep(c(“A”,“C”,“G”,“T”), each=4), c(“A”,“C”,“G”,“T”), collapse = ““)
Changing the case of strings
string1 <- 'Data Science'
tolower(string1)
## [1] "data science"
toupper(string1)
## [1] "DATA SCIENCE"
Create this string ‘A&1B&2C&3’ using a paste function
paste(c("A","B","C"), 1:3, sep = "&", collapse = "")
## [1] "A&1B&2C&3"
dates <- c('11/14/2011', '12/04/2012', '03/01/2013', '02/09/2019')
class(dates)
## [1] "character"
real_dates <- as.Date(dates, format = '%m/%d/%Y')
class(real_dates)
## [1] "Date"
other_format <- format(real_dates, '%A %B %d, %Y')
class(other_format)
## [1] "character"
For the codes used to identify and format dates:
?strptime
## starting httpd help server ... done
Identifying how long ago dates occurred
today <- Sys.Date()
(dif <- today - real_dates)
## Time differences in days
## [1] 4111 3725 3638 1467
class(dif)
## [1] "difftime"
To make a difference in times with a particular time unit of interest use difftime
difftime(today, real_dates, units = "hours")
## Time differences in hours
## [1] 98664 89400 87312 35208
The lubridate package contains a powerful set of tools that can be used to extract and interact with dates.
There are functions like mdy that allow for simpler extration of date information from strings.
#install.packages("lubridate")
library(lubridate, quietly = TRUE)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
(lubri_dates <- mdy(dates))
## [1] "2011-11-14" "2012-12-04" "2013-03-01" "2019-02-09"
Extracting specific date information from a date object
year(lubri_dates)
## [1] 2011 2012 2013 2019
month(lubri_dates)
## [1] 11 12 3 2
day(lubri_dates)
## [1] 14 4 1 9