This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
Stringr package
Install and load stringr library
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 3.2.5Error in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]) :
there is no package called ‘lubridate’
Error: package or namespace load failed for ‘tidyverse’
Turning numbers into strings
format(a,digits=2)
[1] "1.2e+05" "2.5e-04"
formatC(a, format = "f")
[1] "123456.0000" "0.0002"
# Scientific format
formatC(a,format = "e")
[1] "1.2346e+05" "2.5000e-04"
# or
formatC(a,format = "g")
[1] "1.235e+05" "0.00025"
Get to know a dataset
library(babynames)
head(babynames)
# Create a new column named name_female
name_female<-babynames %>% dplyr::select(name,sex) %>% dplyr::filter(sex=="F") %>% dplyr::mutate(Sex_female="Female")
tail(name_female)
name_male<-babynames %>% dplyr::select(name,sex) %>% dplyr::filter(sex!="F") %>% dplyr::mutate(Sex_female="Male")
head(name_male)
boy_length<-str_length(name_male$name)
head(boy_length)
[1] 4 7 5 7 6 5
# calculate the mean of name length
mean(boy_length)
[1] 5.993554
# extract a subset of characters, get the last letter of girl's name
babynames_sub<-str_sub(babynames$name[babynames$sex=="F"],-1,-1)
head(babynames_sub)
[1] "y" "a" "a" "h" "e" "t"
Finding pattern
# Find all pattern "zz" in name column
pattern_zz<-str_detect(babynames$name,pattern = "zz")
head(pattern_zz)
[1] FALSE FALSE FALSE FALSE FALSE FALSE
sum(pattern_zz)
[1] 2313
head(babynames[pattern_zz,])
# Find the babynames containing u
u_name<-str_subset(babynames$name,fixed("u"))
head(babynames[u_name,])
Repalce strings
a<-c("Ha Van & Tuyen","Ha Van & Tri","Ha Van & Tuong")
str_replace(a, pattern = "&", replacement = "")
[1] "Ha Van Tuyen" "Ha Van Tri" "Ha Van Tuong"
# If have many % in one sentence, using str_replace_all instead
b<-c("Ha % Van % Tuyen: % A Massey % student")
str_replace_all(b,pattern = "%",replacement = "")
[1] "Ha Van Tuyen: A Massey student"
LS0tDQp0aXRsZTogInN0cmluZ3IgcHJhY3RpY2UiDQpBdXRob3I6ICJUdXllbiBIYSBWYW4iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KKipTdHJpbmdyIHBhY2thZ2UqKg0KDQogIEluc3RhbGwgYW5kIGxvYWQgKnN0cmluZ3IqIGxpYnJhcnkNCmBgYHtyfQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJzdHJpbmdyIikNCg0KbGlicmFyeShzdHJpbmdyKQ0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmBgYA0KDQogIFR1cm5pbmcgbnVtYmVycyBpbnRvIHN0cmluZ3MNCiAgDQpgYGB7cn0NCiMgVGhlcmUgYXJlIGZ1bmN0aW9ucyB0aGF0IGFsbG93IHRvIGNvbnZlcnQgbnVtYmVycyB0byBzdHJpbmdzDQoNCmE8LWMoMTIzNDU2LDAuMDAwMjUpDQoNCmZvcm1hdChhLGRpZ2l0cz0yKQ0KDQpgYGANCg0KYGBge3J9DQojIEZpeGVkIGFuZCBzY2llbnRpZmljIGZvcm1hdA0KDQojIEZpeGVkIGZvcm1hdA0KDQpmb3JtYXRDKGEsIGZvcm1hdCA9ICJmIikNCg0KYGBgDQoNCg0KYGBge3J9DQoNCiMgU2NpZW50aWZpYyBmb3JtYXQNCg0KZm9ybWF0QyhhLGZvcm1hdCA9ICJlIikNCg0KIyBvciANCmZvcm1hdEMoYSxmb3JtYXQgPSAiZyIpDQoNCmBgYA0KDQogIEdldCB0byBrbm93IGEgZGF0YXNldCANCg0KYGBge3J9DQpsaWJyYXJ5KGJhYnluYW1lcykNCg0KaGVhZChiYWJ5bmFtZXMpDQoNCmBgYA0KDQpgYGB7cn0NCg0KIyBDcmVhdGUgYSBuZXcgY29sdW1uIG5hbWVkIG5hbWVfZmVtYWxlDQoNCm5hbWVfZmVtYWxlPC1iYWJ5bmFtZXMgJT4lIGRwbHlyOjpzZWxlY3QobmFtZSxzZXgpICU+JSBkcGx5cjo6ZmlsdGVyKHNleD09IkYiKSAlPiUgZHBseXI6Om11dGF0ZShTZXhfZmVtYWxlPSJGZW1hbGUiKQ0KDQp0YWlsKG5hbWVfZmVtYWxlKQ0KYGBgDQpgYGB7cn0NCm5hbWVfbWFsZTwtYmFieW5hbWVzICU+JSBkcGx5cjo6c2VsZWN0KG5hbWUsc2V4KSAlPiUgZHBseXI6OmZpbHRlcihzZXghPSJGIikgJT4lIGRwbHlyOjptdXRhdGUoU2V4X2ZlbWFsZT0iTWFsZSIpDQoNCmhlYWQobmFtZV9tYWxlKQ0KDQpgYGANCg0KYGBge3J9DQpib3lfbGVuZ3RoPC1zdHJfbGVuZ3RoKG5hbWVfbWFsZSRuYW1lKQ0KDQpoZWFkKGJveV9sZW5ndGgpDQoNCiMgY2FsY3VsYXRlIHRoZSBtZWFuIG9mIG5hbWUgbGVuZ3RoDQptZWFuKGJveV9sZW5ndGgpDQpgYGANCmBgYHtyfQ0KDQojIGV4dHJhY3QgYSBzdWJzZXQgb2YgY2hhcmFjdGVycywgZ2V0IHRoZSBsYXN0IGxldHRlciBvZiBnaXJsJ3MgbmFtZQ0KDQpiYWJ5bmFtZXNfc3ViPC1zdHJfc3ViKGJhYnluYW1lcyRuYW1lW2JhYnluYW1lcyRzZXg9PSJGIl0sLTEsLTEpDQoNCmhlYWQoYmFieW5hbWVzX3N1YikNCmBgYA0KDQogIEZpbmRpbmcgcGF0dGVybg0KICANCmBgYHtyfQ0KDQojIEZpbmQgYWxsIHBhdHRlcm4gInp6IiBpbiBuYW1lIGNvbHVtbg0KDQpwYXR0ZXJuX3p6PC1zdHJfZGV0ZWN0KGJhYnluYW1lcyRuYW1lLHBhdHRlcm4gPSAienoiKQ0KDQpoZWFkKHBhdHRlcm5fenopDQoNCnN1bShwYXR0ZXJuX3p6KQ0KDQpoZWFkKGJhYnluYW1lc1twYXR0ZXJuX3p6LF0pDQpgYGANCmBgYHtyfQ0KIyBGaW5kIHRoZSBiYWJ5bmFtZXMgY29udGFpbmluZyB1DQoNCnVfbmFtZTwtc3RyX3N1YnNldChiYWJ5bmFtZXMkbmFtZSxmaXhlZCgidSIpKQ0KDQpoZWFkKGJhYnluYW1lc1t1X25hbWUsXSkNCmBgYA0KDQogIFJlcGFsY2Ugc3RyaW5ncyANCg0KYGBge3J9DQphPC1jKCJIYSBWYW4gJiBUdXllbiIsIkhhIFZhbiAmIFRyaSIsIkhhIFZhbiAmIFR1b25nIikNCg0Kc3RyX3JlcGxhY2UoYSwgcGF0dGVybiA9ICImIiwgcmVwbGFjZW1lbnQgPSAiIikNCg0KYGBgDQoNCmBgYHtyfQ0KIyBJZiBoYXZlIG1hbnkgJSBpbiBvbmUgc2VudGVuY2UsIHVzaW5nIHN0cl9yZXBsYWNlX2FsbCBpbnN0ZWFkDQpiPC1jKCJIYSAlIFZhbiAlIFR1eWVuOiAlIEEgTWFzc2V5ICUgc3R1ZGVudCIpDQoNCnN0cl9yZXBsYWNlX2FsbChiLHBhdHRlcm4gPSAiJSIscmVwbGFjZW1lbnQgPSAiIikNCmBgYA0KDQogDQo=