Extract-or-replace-substrings-in-a-character-vector-substr-substring-strsplit.R

#substr(x, start, stop)
#substring(text, first, last = 1000000L)
#strsplit(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE
substr("abcdef", 2, 4)

## [1] "bcd"

substring("abcdef", 1:6, 1:6)

## [1] "a" "b" "c" "d" "e" "f"

## strsplit() is more efficient ...
########################################
rep("abcdef", 4); data.frame(1:4, 4:5); substr(rep("abcdef", 4), 1:4, 4:5)

## [1] "abcdef" "abcdef" "abcdef" "abcdef"

##   X1.4 X4.5
## 1    1    4
## 2    2    5
## 3    3    4
## 4    4    5

## [1] "abcd" "bcde" "cd"   "de"

########################################
x <- c("asfef", "qwerty", "yuiop[", "b", "stuff.blah.yech")
x

## [1] "asfef"           "qwerty"          "yuiop["          "b"              
## [5] "stuff.blah.yech"

substr(x, 2, 6)

## [1] "sfef"  "werty" "uiop[" ""      "tuff."

substring(x, 2, 4:6)

## [1] "sfe"   "wert"  "uiop[" ""      "tuff"

#############?substr
x <- c(as = "asfef", qu = "qwerty", "yuiop[", "b", "stuff.blah.yech")
x

##                as                qu                                     
##           "asfef"          "qwerty"          "yuiop["               "b" 
##                   
## "stuff.blah.yech"

# split x on the letter e
strsplit(x, "e")

## $as
## [1] "asf" "f"  
## 
## $qu
## [1] "qw"  "rty"
## 
## [[3]]
## [1] "yuiop["
## 
## [[4]]
## [1] "b"
## 
## [[5]]
## [1] "stuff.blah.y" "ch"

unlist(strsplit(x, "e"))

##            as1            as2            qu1            qu2                
##          "asf"            "f"           "qw"          "rty"       "yuiop[" 
##                                              
##            "b" "stuff.blah.y"           "ch"

unlist(strsplit("a.b.c", "."))

## [1] "" "" "" "" ""

## [1] "" "" "" "" ""
## Note that 'split' is a regexp!
## If you really want to split on '.', use
unlist(strsplit("a.b.c", "[.]"))

## [1] "a" "b" "c"

## [1] "a" "b" "c"
## or
unlist(strsplit("a.b.c", ".", fixed = TRUE))

## [1] "a" "b" "c"

## a useful function: rev() for strings
strReverse <- function(x)
  sapply(lapply(strsplit(x, NULL), rev), paste, collapse = "")
strReverse(c("abc", "Statistics"))

## [1] "cba"        "scitsitatS"

## get the first names of the members of R-core
a <- readLines(file.path(R.home("doc"),"AUTHORS"))[-(1:8)]
a <- a[(0:2)-length(a)]
a

##  [1] "Douglas Bates"                                                               
##  [2] "John Chambers"                                                               
##  [3] "Peter Dalgaard"                                                              
##  [4] "Robert Gentleman"                                                            
##  [5] "Kurt Hornik"                                                                 
##  [6] "Ross Ihaka"                                                                  
##  [7] "Tomas Kalibera"                                                              
##  [8] "Michael Lawrence"                                                            
##  [9] "Friedrich Leisch"                                                            
## [10] "Uwe Ligges"                                                                  
## [11] "Thomas Lumley"                                                               
## [12] "Martin Maechler"                                                             
## [13] "Sebastian Meyer"                                                             
## [14] "Paul Murrell"                                                                
## [15] "Martyn Plummer"                                                              
## [16] "Brian Ripley"                                                                
## [17] "Deepayan Sarkar"                                                             
## [18] "Duncan Temple Lang"                                                          
## [19] "Luke Tierney"                                                                
## [20] "Simon Urbanek"                                                               
## [21] ""                                                                            
## [22] "plus Heiner Schwarte up to October 1999, Guido Masarotto up to June 2003,"   
## [23] "Stefano Iacus up to July 2014, Seth Falcon up to August 2015, Duncan Murdoch"
## [24] "up to September 2017, and Martin Morgan up to June 2021."                    
## [25] ""                                                                            
## [26] ""                                                                            
## [27] "Current R-core members can be contacted via email to R-project.org"          
## [28] "with name made up by replacing spaces by dots in the name listed above."

(a <- sub(" .*","", a))

##  [1] "Douglas"   "John"      "Peter"     "Robert"    "Kurt"      "Ross"     
##  [7] "Tomas"     "Michael"   "Friedrich" "Uwe"       "Thomas"    "Martin"   
## [13] "Sebastian" "Paul"      "Martyn"    "Brian"     "Deepayan"  "Duncan"   
## [19] "Luke"      "Simon"     ""          "plus"      "Stefano"   "up"       
## [25] ""          ""          "Current"   "with"

# and reverse them
strReverse(a)

##  [1] "salguoD"   "nhoJ"      "reteP"     "treboR"    "truK"      "ssoR"     
##  [7] "samoT"     "leahciM"   "hcirdeirF" "ewU"       "samohT"    "nitraM"   
## [13] "naitsabeS" "luaP"      "nytraM"    "nairB"     "nayapeeD"  "nacnuD"   
## [19] "ekuL"      "nomiS"     ""          "sulp"      "onafetS"   "pu"       
## [25] ""          ""          "tnerruC"   "htiw"

Extract-or-replace-substrings-in-a-character-vector-substr-substring-strsplit.R

liyix

2022-01-03