paste("Hello", "Naimish", "Agarwal")
## [1] "Hello Naimish Agarwal"
paste("Hello", "Naimish", "Agarwal", sep = "/")
## [1] "Hello/Naimish/Agarwal"
x <- c("Hello", "Hi", "Holla")
y <- c("N1", "N2", "N3")
paste(x, y)
## [1] "Hello N1" "Hi N2" "Holla N3"
paste("Hello", y)
## [1] "Hello N1" "Hello N2" "Hello N3"
paste("Hello", y, c("Good Bye", "Bye"))
## [1] "Hello N1 Good Bye" "Hello N2 Bye" "Hello N3 Good Bye"
y <- c("Hello", "Naimish", "Agarwal")
paste(y)
## [1] "Hello" "Naimish" "Agarwal"
paste(y, collapse = " ")
## [1] "Hello Naimish Agarwal"
x <- "Naimish"
paste("Hello ", x, " you are doing aweseme! ", "Bye ", sep = "")
## [1] "Hello Naimish you are doing aweseme! Bye "
sprintf("Hello %s, you are doing awesome!", x)
## [1] "Hello Naimish, you are doing awesome!"
library(XML)
congURL <- "http://www.loc.gov/rr/print/list/057_chron.html"
usPresidents <- readHTMLTable(doc = congURL, header = TRUE, which = 3, as.data.frame = TRUE, skip.rows = 1, stringsAsFactors = FALSE)
head(usPresidents)
## YEAR PRESIDENT
## 1 1789-1797 George Washington
## 2 1797-1801 John Adams
## 3 1801-1805 Thomas Jefferson
## 4 1805-1809 Thomas Jefferson
## 5 1809-1812 James Madison
## 6 1812-1813 James Madison
## FIRST LADY VICE PRESIDENT
## 1 Martha Washington John Adams
## 2 Abigail Adams Thomas Jefferson
## 3 Martha Wayles Skelton Jefferson\n (no image) Aaron Burr
## 4 Martha Wayles Skelton Jefferson\n (no image) George Clinton
## 5 Dolley Madison George Clinton
## 6 Dolley Madison office vacant
tail(usPresidents)
## YEAR
## 63 2001-2009
## 64 2009-
## 65 Presidents: Introduction (Rights/Ordering\n Info.) | Adams\n - Cleveland | Clinton - Harding Harrison\n - Jefferson | Johnson - McKinley | Monroe\n - Roosevelt | Taft - Truman | Tyler\n - WilsonList of names, Alphabetically
## 66 First Ladies: Introduction\n (Rights/Ordering Info.) | Adams\n - Coolidge | Eisenhower - HooverJackson\n - Pierce | \n Polk - Wilson | List\n of names, Alphabetically
## 67 Vice Presidents: Introduction (Rights/Ordering Info.) | Adams - Coolidge | Curtis - Hobart Humphrey - Rockefeller | Roosevelt - WilsonList of names, Alphabetically
## 68 Top\n of Page
## PRESIDENT FIRST LADY VICE PRESIDENT
## 63 George W. Bush Laura Bush Richard Cheney
## 64 Barack Obama Michelle Obama Joseph R. Biden
## 65 <NA> <NA> <NA>
## 66 <NA> <NA> <NA>
## 67 <NA> <NA> <NA>
## 68 <NA> <NA> <NA>
tail(x = usPresidents$YEAR)
## [1] "2001-2009"
## [2] "2009-"
## [3] "Presidents: Introduction (Rights/Ordering\n Info.) | Adams\n - Cleveland | Clinton - Harding Harrison\n - Jefferson | Johnson - McKinley | Monroe\n - Roosevelt | Taft - Truman | Tyler\n - WilsonList of names, Alphabetically"
## [4] "First Ladies: Introduction\n (Rights/Ordering Info.) | Adams\n - Coolidge | Eisenhower - HooverJackson\n - Pierce | \n Polk - Wilson | List\n of names, Alphabetically"
## [5] "Vice Presidents: Introduction (Rights/Ordering Info.) | Adams - Coolidge | Curtis - Hobart Humphrey - Rockefeller | Roosevelt - WilsonList of names, Alphabetically"
## [6] "Top\n of Page"
usPresidents <- usPresidents[1:64, ]
head(usPresidents)
## YEAR PRESIDENT
## 1 1789-1797 George Washington
## 2 1797-1801 John Adams
## 3 1801-1805 Thomas Jefferson
## 4 1805-1809 Thomas Jefferson
## 5 1809-1812 James Madison
## 6 1812-1813 James Madison
## FIRST LADY VICE PRESIDENT
## 1 Martha Washington John Adams
## 2 Abigail Adams Thomas Jefferson
## 3 Martha Wayles Skelton Jefferson\n (no image) Aaron Burr
## 4 Martha Wayles Skelton Jefferson\n (no image) George Clinton
## 5 Dolley Madison George Clinton
## 6 Dolley Madison office vacant
tail(usPresidents)
## YEAR PRESIDENT FIRST LADY VICE PRESIDENT
## 59 1977-1981 Jimmy Carter Rosalynn Carter Walter F. Mondale
## 60 1981-1989 Ronald Reagan Nancy Reagan George Bush
## 61 1989-1993 George Bush Barbara Bush Dan Quayle
## 62 1993-2001 Bill Clinton Hillary Rodham Clinton Albert Gore
## 63 2001-2009 George W. Bush Laura Bush Richard Cheney
## 64 2009- Barack Obama Michelle Obama Joseph R. Biden
library(stringr)
years <- str_split(string = usPresidents$YEAR, pattern = "-")
head(years)
## [[1]]
## [1] "1789" "1797"
##
## [[2]]
## [1] "1797" "1801"
##
## [[3]]
## [1] "1801" "1805"
##
## [[4]]
## [1] "1805" "1809"
##
## [[5]]
## [1] "1809" "1812"
##
## [[6]]
## [1] "1812" "1813"
library(plyr)
ldply(.data = years, .fun = function(x) {
c(Begin = x[1], End = x[2])
})
## Begin End
## 1 1789 1797
## 2 1797 1801
## 3 1801 1805
## 4 1805 1809
## 5 1809 1812
## 6 1812 1813
## 7 1813 1814
## 8 1814 1817
## 9 1817 1825
## 10 1825 1829
## 11 1829 1832
## 12 1833 1837
## 13 1837 1841
## 14 1841 <NA>
## 15 1841 1845
## 16 1845 1849
## 17 1849 1850
## 18 1850 1853
## 19 1853 <NA>
## 20 1853 1857
## 21 1857 1861
## 22 1861 1865
## 23 1865 <NA>
## 24 1865 1869
## 25 1869 1873
## 26 1873 1875
## 27 1875 1877
## 28 1877 1881
## 29 1881 <NA>
## 30 1881 1885
## 31 1885 <NA>
## 32 1885 1889
## 33 1889 1893
## 34 1893 1897
## 35 1897 1899
## 36 1899 1901
## 37 1901 <NA>
## 38 1901 1905
## 39 1905 1909
## 40 1909 1912
## 41 1912 1913
## 42 1913 1921
## 43 1921 1923
## 44 1923 1925
## 45 1925 1929
## 46 1929 1933
## 47 1933 1941
## 48 1941 1945
## 49 1945 <NA>
## 50 1945 1949
## 51 1949 1953
## 52 1953 1961
## 53 1961 1963
## 54 1963 1965
## 55 1963 1969
## 56 1969 1973
## 57 1973 1974
## 58 1974 1977
## 59 1977 1981
## 60 1981 1989
## 61 1989 1993
## 62 1993 2001
## 63 2001 2009
## 64 2009
yearMatrix <- data.frame(Reduce(f = rbind, x = years))
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
## --> row.names NOT used
head(yearMatrix)
## X1 X2
## 1 1789 1797
## 2 1797 1801
## 3 1801 1805
## 4 1805 1809
## 5 1809 1812
## 6 1812 1813
names(yearMatrix) <- c("Begin", "End")
head(yearMatrix)
## Begin End
## 1 1789 1797
## 2 1797 1801
## 3 1801 1805
## 4 1805 1809
## 5 1809 1812
## 6 1812 1813
usPresidents <- cbind(usPresidents, yearMatrix)
head(usPresidents)
## YEAR PRESIDENT
## 1 1789-1797 George Washington
## 2 1797-1801 John Adams
## 3 1801-1805 Thomas Jefferson
## 4 1805-1809 Thomas Jefferson
## 5 1809-1812 James Madison
## 6 1812-1813 James Madison
## FIRST LADY VICE PRESIDENT Begin
## 1 Martha Washington John Adams 1789
## 2 Abigail Adams Thomas Jefferson 1797
## 3 Martha Wayles Skelton Jefferson\n (no image) Aaron Burr 1801
## 4 Martha Wayles Skelton Jefferson\n (no image) George Clinton 1805
## 5 Dolley Madison George Clinton 1809
## 6 Dolley Madison office vacant 1812
## End
## 1 1797
## 2 1801
## 3 1805
## 4 1809
## 5 1812
## 6 1813
str_sub(string = usPresidents$PRESIDENT, start = 1, end = 3)
## [1] "Geo" "Joh" "Tho" "Tho" "Jam" "Jam" "Jam" "Jam" "Jam" "Joh" "And"
## [12] "And" "Mar" "Wil" "Joh" "Jam" "Zac" "Mil" "Fra" "Fra" "Jam" "Abr"
## [23] "Abr" "And" "Uly" "Uly" "Uly" "Rut" "Jam" "Che" "Gro" "Gro" "Ben"
## [34] "Gro" "Wil" "Wil" "Wil" "The" "The" "Wil" "Wil" "Woo" "War" "Cal"
## [45] "Cal" "Her" "Fra" "Fra" "Fra" "Har" "Har" "Dwi" "Joh" "Lyn" "Lyn"
## [56] "Ric" "Ric" "Ger" "Jim" "Ron" "Geo" "Bil" "Geo" "Bar"
str_sub(string = usPresidents$PRESIDENT, start = 4, end = 8)
## [1] "rge W" "n Ada" "mas J" "mas J" "es Ma" "es Ma" "es Ma" "es Ma"
## [9] "es Mo" "n Qui" "rew J" "rew J" "tin V" "liam " "n Tyl" "es K."
## [17] "hary " "lard " "nklin" "nklin" "es Bu" "aham " "aham " "rew J"
## [25] "sses " "sses " "sses " "herfo" "es A." "ster " "ver C" "ver C"
## [33] "jamin" "ver C" "liam " "liam " "liam " "odore" "odore" "liam "
## [41] "liam " "drow " "ren G" "vin C" "vin C" "bert " "nklin" "nklin"
## [49] "nklin" "ry S." "ry S." "ght D" "n F. " "don B" "don B" "hard "
## [57] "hard " "ald R" "my Ca" "ald R" "rge B" "l Cli" "rge W" "ack O"
usPresidents[str_sub(string = usPresidents$Begin, start = 4, end = 4) == 1, ]
## YEAR PRESIDENT
## 3 1801-1805 Thomas Jefferson
## 14 1841 William Henry Harrison
## 15 1841-1845 John Tyler
## 22 1861-1865 Abraham Lincoln
## 29 1881 James A. Garfield
## 30 1881-1885 Chester A. Arthur
## 37 1901 William McKinley
## 38 1901-1905 Theodore Roosevelt
## 43 1921-1923 Warren G. Harding
## 48 1941-1945 Franklin D. Roosevelt
## 53 1961-1963 John F. Kennedy
## 60 1981-1989 Ronald Reagan
## 63 2001-2009 George W. Bush
## FIRST LADY
## 3 Martha Wayles Skelton Jefferson\n (no image)
## 14 Anna Tuthill Symmes Harrison
## 15 Letitia Christian Tyler and Julia Gardiner Tyler (no images)
## 22 Mary Todd Lincoln
## 29 Lucretia Rudolph Garfield
## 30 Ellen Lewis Herndon Arthur
## 37 Ida Saxton McKinley
## 38 Edith Kermit Carow Roosevelt
## 43 Florence Kling Harding
## 48 Eleanor Roosevelt
## 53 Jacqueline Kennedy Onassis
## 60 Nancy Reagan
## 63 Laura Bush
## VICE PRESIDENT Begin End
## 3 Aaron Burr 1801 1805
## 14 John Tyler 1841 1841
## 15 office vacant 1841 1845
## 22 Hannibal Hamlin 1861 1865
## 29 Chester A. Arthur 1881 1881
## 30 office vacant 1881 1885
## 37 Theodore Roosevelt 1901 1901
## 38 office vacant 1901 1905
## 43 Calvin Coolidge 1921 1923
## 48 Henry A. Wallace 1941 1945
## 53 Lyndon B. Johnson 1961 1963
## 60 George Bush 1981 1989
## 63 Richard Cheney 2001 2009
usPresidents[str_sub(string = usPresidents$Begin, start = 4, end = 4) == 1, c("YEAR", "PRESIDENT", "Begin", "End")]
## YEAR PRESIDENT Begin End
## 3 1801-1805 Thomas Jefferson 1801 1805
## 14 1841 William Henry Harrison 1841 1841
## 15 1841-1845 John Tyler 1841 1845
## 22 1861-1865 Abraham Lincoln 1861 1865
## 29 1881 James A. Garfield 1881 1881
## 30 1881-1885 Chester A. Arthur 1881 1885
## 37 1901 William McKinley 1901 1901
## 38 1901-1905 Theodore Roosevelt 1901 1905
## 43 1921-1923 Warren G. Harding 1921 1923
## 48 1941-1945 Franklin D. Roosevelt 1941 1945
## 53 1961-1963 John F. Kennedy 1961 1963
## 60 1981-1989 Ronald Reagan 1981 1989
## 63 2001-2009 George W. Bush 2001 2009
str_detect(usPresidents$PRESIDENT, "John")
## [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [12] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str_detect(usPresidents$PRESIDENT, ignore.case("John"))
## Please use (fixed|coll|regexp)(x, ignore_case = TRUE) instead of ignore.case(x)
## [1] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [12] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
usPresidents[str_detect(usPresidents$PRESIDENT, ignore.case("John")), ]
## Please use (fixed|coll|regexp)(x, ignore_case = TRUE) instead of ignore.case(x)
## YEAR PRESIDENT
## 2 1797-1801 John Adams
## 10 1825-1829 John Quincy Adams
## 15 1841-1845 John Tyler
## 24 1865-1869 Andrew Johnson
## 53 1961-1963 John F. Kennedy
## 54 1963-1965 Lyndon B. Johnson
## 55 1963-1969 Lyndon B. Johnson
## FIRST LADY
## 2 Abigail Adams
## 10 Louisa Catherine Adams
## 15 Letitia Christian Tyler and Julia Gardiner Tyler (no images)
## 24 Eliza McCardle Johnson
## 53 Jacqueline Kennedy Onassis
## 54 Lady Bird Johnson
## 55 Lady Bird Johnson
## VICE PRESIDENT Begin End
## 2 Thomas Jefferson 1797 1801
## 10 John C. Calhoun 1825 1829
## 15 office vacant 1841 1845
## 24 office vacant 1865 1869
## 53 Lyndon B. Johnson 1961 1963
## 54 office vacant 1963 1965
## 55 Hubert M. Humphrey 1963 1969
con <- url("http://www.jaredlander.com/data/warTimes.rdata")
load(con)
close(con)
head(warTimes, 12)
## [1] "September 1, 1774 ACAEA September 3, 1783"
## [2] "September 1, 1774 ACAEA March 17, 1776"
## [3] "1775ACAEA1783"
## [4] "June 1775 ACAEA October 1776"
## [5] "July 1776 ACAEA March 1777"
## [6] "June 14, 1777 ACAEA October 17, 1777"
## [7] "1777ACAEA1778"
## [8] "1775ACAEA1782"
## [9] "1776ACAEA1794"
## [10] "1778ACAEA1782"
## [11] "1775ACAEA1782"
## [12] "1779ACAEA1782"
warTimes[str_detect(string = warTimes, pattern = "-")]
## [1] "6 June 1944 ACAEA mid-July 1944" "25 August-17 December 1944"
warTimes <- str_split(string = warTimes, pattern = "(ACAEA)|-", n = 2)
head(warTimes)
## [[1]]
## [1] "September 1, 1774 " " September 3, 1783"
##
## [[2]]
## [1] "September 1, 1774 " " March 17, 1776"
##
## [[3]]
## [1] "1775" "1783"
##
## [[4]]
## [1] "June 1775 " " October 1776"
##
## [[5]]
## [1] "July 1776 " " March 1777"
##
## [[6]]
## [1] "June 14, 1777 " " October 17, 1777"
start <- sapply(X = warTimes, FUN = function(x){
return(x[1])
})
head(start)
## [1] "September 1, 1774 " "September 1, 1774 " "1775"
## [4] "June 1775 " "July 1776 " "June 14, 1777 "
str_trim(string = start)
## [1] "September 1, 1774" "September 1, 1774" "1775"
## [4] "June 1775" "July 1776" "June 14, 1777"
## [7] "1777" "1775" "1776"
## [10] "1778" "1775" "1779"
## [13] "January" "1785" "1798"
## [16] "1801" "August" "June 18, 1812"
## [19] "1812" "1813" "1812"
## [22] "1812" "1813" "1813"
## [25] "1813" "1814" "1813"
## [28] "1814" "1813" "1815"
## [31] "November 22, 1817" "1817" "1819"
## [34] "November 5" "1823" "1825"
## [37] "1827" "May" "February 6"
## [40] "1838" "December 23, 1835" "December 1838"
## [43] "October 19" "March 11, 1845" "April 25, 1846"
## [46] "1846" "1846" "1846"
## [49] "1847" "1847" "1858"
## [52] "1847" "April 28" "1851"
## [55] "July 13, 1854" "August 4, 1855" "October 1855"
## [58] "1855" "1855" "1855"
## [61] "1855" "1856" "6"
## [64] "1859" "March 6, 1860" "1860"
## [67] "April 12, 1861" "1861" "1861"
## [70] "1862" "1862" "1862"
## [73] "August 17" "July 31, 1861" "1863"
## [76] "July 20, 1863" "1865" "1864"
## [79] "1866" "June 1867" "1867"
## [82] "June 17, 1870" "June 1, 1871" "July 6, 1872"
## [85] "February 12, 1874" "June 27, 1874" "1876"
## [88] "1877" "1878" "1878"
## [91] "1879" "1879" "June"
## [94] "March 30" "1887" "November 1890"
## [97] "June 22" "1891" "January 21"
## [100] "1898" "April 25" "1898"
## [103] "1898" "1898" "1898"
## [106] "1898" "1898" "June 2, 1899"
## [109] "1899" "September 28, 1899" "1912"
## [112] "April 21, 1914" "July 28, 1915" "1916"
## [115] "1917" "1917" "1917"
## [118] "1917" "1917" "1917"
## [121] "1918" "1918" "1918"
## [124] "December 7, 1941" "1941" "1941"
## [127] "" "" "June 3, 1942"
## [130] "August 7, 1942" "January 1942" "November, 1943"
## [133] "June" "October 20, 1944" "January"
## [136] "1 May" "16 February" "1941"
## [139] "8" "1942" "17 November 1942"
## [142] "9 July" "1942" "3 September 1943"
## [145] "January 22, 1944" "22 January 1944" "6 June 1944"
## [148] "6 June" "15 August 1944" "25 August"
## [151] "25 August 1944" "16 December 1944" "February 8, 1945"
## [154] "6 April 1945" "1947" "1950"
## [157] "1950" "1953" "1953"
## [160] "1970" "April 28, 1965" "25 October"
## [163] "July 15" "August 24, 1982" "August 19, 1981"
## [166] "March 1986" "April 15, 1986" "1987"
## [169] "January 4, 1989" "20 December 1989" "August 2, 1990"
## [172] "1991" "1992" "1993"
## [175] "19 September 1994" "August 20, 1998" "March 24"
## [178] "7 October 2001" "7 October 2001" "October 7, 2001"
## [181] "15 January 2002" "7 October 2002" "6 February 2007"
## [184] "March 20, 2003" "March 16, 2004" "January 14, 2010"
## [187] "2003" "March 19"
str_extract(string = start, pattern = "January")
## [1] NA NA NA NA NA NA NA
## [8] NA NA NA NA NA "January" NA
## [15] NA NA NA NA NA NA NA
## [22] NA NA NA NA NA NA NA
## [29] NA NA NA NA NA NA NA
## [36] NA NA NA NA NA NA NA
## [43] NA NA NA NA NA NA NA
## [50] NA NA NA NA NA NA NA
## [57] NA NA NA NA NA NA NA
## [64] NA NA NA NA NA NA NA
## [71] NA NA NA NA NA NA NA
## [78] NA NA NA NA NA NA NA
## [85] NA NA NA NA NA NA NA
## [92] NA NA NA NA NA NA NA
## [99] "January" NA NA NA NA NA NA
## [106] NA NA NA NA NA NA NA
## [113] NA NA NA NA NA NA NA
## [120] NA NA NA NA NA NA NA
## [127] NA NA NA NA "January" NA NA
## [134] NA "January" NA NA NA NA NA
## [141] NA NA NA NA "January" "January" NA
## [148] NA NA NA NA NA NA NA
## [155] NA NA NA NA NA NA NA
## [162] NA NA NA NA NA NA NA
## [169] "January" NA NA NA NA NA NA
## [176] NA NA NA NA NA "January" NA
## [183] NA NA NA "January" NA NA
start[str_detect(string = start, pattern = "January")]
## [1] "January " "January 21" "January 1942 "
## [4] "January " "January 22, 1944 " "22 January 1944 "
## [7] "January 4, 1989" "15 January 2002 " "January 14, 2010 "
head(str_extract(string = start, pattern = "[0-9][0-9][0-9][0-9]"), n = 20)
## [1] "1774" "1774" "1775" "1775" "1776" "1777" "1777" "1775" "1776" "1778"
## [11] "1775" "1779" NA "1785" "1798" "1801" NA "1812" "1812" "1813"
head(str_extract(string = start, pattern = "[0-9]{4}"), n = 20)
## [1] "1774" "1774" "1775" "1775" "1776" "1777" "1777" "1775" "1776" "1778"
## [11] "1775" "1779" NA "1785" "1798" "1801" NA "1812" "1812" "1813"
head(str_extract(string = start, pattern = "\\d{4}"), n = 20)
## [1] "1774" "1774" "1775" "1775" "1776" "1777" "1777" "1775" "1776" "1778"
## [11] "1775" "1779" NA "1785" "1798" "1801" NA "1812" "1812" "1813"
head(str_extract(string = start, pattern = "\\d{1,3}"), n = 20)
## [1] "1" "1" "177" "177" "177" "14" "177" "177" "177" "177" "177"
## [12] "177" NA "178" "179" "180" NA "18" "181" "181"
head(str_extract(string = start, pattern = "^\\d{4}"), n = 30)
## [1] NA NA "1775" NA NA NA "1777" "1775" "1776" "1778"
## [11] "1775" "1779" NA "1785" "1798" "1801" NA NA "1812" "1813"
## [21] "1812" "1812" "1813" "1813" "1813" "1814" "1813" "1814" "1813" "1815"
head(str_extract(string = start, pattern = "^\\d{4}$"), n = 30)
## [1] NA NA "1775" NA NA NA "1777" "1775" "1776" "1778"
## [11] "1775" "1779" NA "1785" "1798" "1801" NA NA "1812" "1813"
## [21] "1812" "1812" "1813" "1813" "1813" "1814" "1813" "1814" "1813" "1815"
head(str_replace(string = start, pattern = "\\d", replacement = "x"), 30)
## [1] "September x, 1774 " "September x, 1774 " "x775"
## [4] "June x775 " "July x776 " "June x4, 1777 "
## [7] "x777" "x775" "x776"
## [10] "x778" "x775" "x779"
## [13] "January " "x785" "x798"
## [16] "x801" "August " "June x8, 1812 "
## [19] "x812" "x813" "x812"
## [22] "x812" "x813" "x813"
## [25] "x813" "x814" "x813"
## [28] "x814" "x813" "x815"
head(str_replace_all(string = start, pattern = "\\d", replacement = "x"), 30)
## [1] "September x, xxxx " "September x, xxxx " "xxxx"
## [4] "June xxxx " "July xxxx " "June xx, xxxx "
## [7] "xxxx" "xxxx" "xxxx"
## [10] "xxxx" "xxxx" "xxxx"
## [13] "January " "xxxx" "xxxx"
## [16] "xxxx" "August " "June xx, xxxx "
## [19] "xxxx" "xxxx" "xxxx"
## [22] "xxxx" "xxxx" "xxxx"
## [25] "xxxx" "xxxx" "xxxx"
## [28] "xxxx" "xxxx" "xxxx"
head(str_replace_all(string = start, pattern = "\\d{1,4}", replacement = "x"), 30)
## [1] "September x, x " "September x, x " "x"
## [4] "June x " "July x " "June x, x "
## [7] "x" "x" "x"
## [10] "x" "x" "x"
## [13] "January " "x" "x"
## [16] "x" "August " "June x, x "
## [19] "x" "x" "x"
## [22] "x" "x" "x"
## [25] "x" "x" "x"
## [28] "x" "x" "x"
x <- c("<a href = 'index.html'>The link is here</a>", "<b>This is bold text</b>")
x
## [1] "<a href = 'index.html'>The link is here</a>"
## [2] "<b>This is bold text</b>"
str_replace(string = x, pattern = "<.+?>(.+?)<.+>", replacement = "\\1")
## [1] "The link is here" "This is bold text"