library(tidyverse)
library(lubridate)
302页第四题: Create functions that take a vector as input and return: a. The last value. Should you use [ or [[?
last_value <- function(x){
if(is.atomic(x)){
x[length(x)]
}
else{
x[[length(x)]]
}
}
last_value(c(1,2,3))
[1] 3
last_value(list(1,2,3))
[1] 3
even_position <- function(x){
index=seq(2,length(x),2)
if(is.atomic(x)){
x[index]
}
else{
unlist(x)[index]
}
}
even_position(letters)
[1] "b" "d" "f" "h" "j" "l" "n" "p" "r" "t" "v" "x" "z"
even_position(list(1,2))
[1] 2
except_last_value <- function(x){
if(is.atomic(x)){
x[-length(x)]
}
else{
unlist(x)[-length(x)]
}
}
except_last_value(c(1,2,3))
[1] 1 2
except_last_value(list(1,2,3))
[1] 1 2
even_noNA <- function(x){
index=seq(2,length(x),2)
if(is.atomic(x)){
intersect(x[index],x[!is.na(x)])
}
else{
intersect(unlist(x)[index],unlist(x)[!is.na(x)])
}
}
even_noNA(list(1,2,3,NA,2,3))
[1] 2 3
x <- c(NaN,NA,-1,1)
x[x<=0]
[1] NA NA -1
x[-which(x>0)]
[1] NaN NA -1
table2
spread(table2,key=type,value = count)
table3
separate(table3,rate,into=c("cases","population"),sep="/")
table4a
gather(table4a,"1999","2000",key="year",value = "cases")
table4b
gather(table4b,"1999","2000",key="year",value = "population")
table5
unite(table5,year,century,year,sep="")
flights <- nycflights13::flights
weather <- nycflights13::weather
airports <- nycflights13::airports
如何将隐含缺失变为明确缺失?
stocks <- tibble(
year=c(2015,2015,2015,2015,2016,2016,2016),
qtr=c(1,2,3,4,2,3,4),
return=c(12,2,3,NA,23,23,23)
)
stocks
complete(stocks,year,qtr)
虚假缺失
treatment <- tribble( ~ person, ~ treatment, ~response,
"Derrick Whitmore", 1, 7,
NA, 2, 10,
NA, 3, 9,
"Katherine Burke", 1, 4 )
treatment
fill(treatment,person)
exercise 156页 1.Why are gather() and spread() not perfectly symmetrical? Carefully consider the following example
stocks <- tibble( year = c(2015, 2015, 2016, 2016),
half = c( 1, 2, 1, 2),
return = c(1.88, 0.59, 0.92, 0.17) )
stocks
stocks %>%
spread(year, return) %>%
gather("year", "return", `2015`:`2016`)
stocks %>%
spread(year, return)
stocks %>%
spread(year, return) %>%
gather("year", "return", `2015`:`2016`,convert = TRUE)
Both spread() and gather() have a convert argument. What does it do?
2.Why does spreading this tibble fail? How could you add a new column to fix the problem
people <- tribble( ~name, ~key, ~value,
"Phillip Woods", "age", 45,
"Phillip Woods", "height", 186,
"Phillip Woods", "age", 50,
"Jessica Cordero", "age", 37,
"Jessica Cordero", "height", 156 )
people
people$times=c(1,1,2,1,1)
spread(people,key,value)
preg <- tribble( ~pregnant, ~male, ~female, "yes", NA, 10, "no", 20, 12 )
preg
preg <- gather(preg,male,female,key = sex,value = count)
preg
spread(preg,pregnant,count)
1.everything函数的作用?
2.偏移函数
x <- 1:10
lead(x)
[1] 2 3 4 5 6 7 8 9 10 NA
lag(x)
[1] NA 1 2 3 4 5 6 7 8 9
3.滚动聚合
cumprod(x)
[1] 1 2 6 24 120 720 5040 40320 362880 3628800
4.排名函数
y <- c(2,1,NA,-1,8,-1)
min_rank(y)
[1] 4 3 NA 1 5 1
dense_rank(y)
[1] 3 2 NA 1 4 1
row_number(y)
[1] 4 3 NA 1 5 2
percent_rank(y)
[1] 0.75 0.50 NA 0.00 1.00 0.00
cume_dist(y)
[1] 0.8 0.6 NA 0.4 1.0 0.4
5.汇总函数
x <- seq(1,11)
x
[1] 1 2 3 4 5 6 7 8 9 10 11
四分位间距
IQR(x)
[1] 5
quantile(x,0.25)
25%
3.5
quantile(x,0.75)
75%
8.5
绝对中位差
mad(x)
[1] 4.4478
定位度量
nth(x,5)
[1] 5
last(x)
[1] 11
计数 样本个数
flighs <- nycflights13::flights
flighs
flights共有多少个样本?
flighs%>%
summarise(n=n())
exercise 1. Find all flights that: a. Had an arrival delay of two or more hours
flighs%>%filter(arr_delay>=120)
Flew to Houston (IAH or HOU)
Were operated by United, American, or Delta
flights%>%
filter(carrier %in% c("AA","UA","DL"))
flights%>%
filter(month %in% c(6,7,8))
flights%>%
filter(dep_delay<=0 & arr_delay>=120)
flights%>%
filter(arr_delay>=60 &(dep_delay-arr_delay)==30)
58页第一题 1. Currently dep_time and sched_dep_time are convenient to look at, but hard to compute with because they’re not really continu‐ous numbers. Convert them to a more convenient representa‐tion of number of minutes since midnight
73页第六题 6. For each plane, count the number of flights before the first delay of greater than 1 hour.
flights%>%
group_by(tailnum)%>%
filter(!(is.na(arr_delay)))%>%
arrange(year,month,day,dep_time)%>%
summarise(n=sum(cumall(arr_delay<=60)))%>%
arrange(desc(n))
75页第七题 7. Find all destinations that are flown by at least two carriers. Use that information to rank the carriers
flights%>%
group_by(dest)%>%
filter(!is.na(carrier))%>%
summarise(n=n_distinct(carrier))%>%
arrange(desc(n))
x <- tribble( ~key, ~val_x,
1, "x1",
2, "x2",
3, "x3" )
y <- tribble( ~key, ~val_y, 1, "y1", 2, "y2", 4, "y3" )
x
y
x%>%
inner_join(y,"key")
x%>%
left_join(y,"key")
x%>%
right_join(y,"key")
x%>%
full_join(y,"key")
x <- tribble( ~key, ~val_x, 1, "x1", 2, "x2", 2, "x3", 3, "x4" )
y <- tribble( ~key, ~val_y, 1, "y1", 2, "y2", 2, "y3", 3, "y4" )
x
y
x%>%
left_join(y,"key")
flights%>%
left_join(weather)
flights%>%
left_join(airports,by=c("dest"="faa"))
semi_join 作用 anti_join 作用
df1 <- tribble( ~x, ~y,
1, 1,
2, 1 )
df2 <- tribble( ~x, ~y,
1, 1,
1, 2 )
df1
df2
intersect(df1,df2)
union(df1,df2)
setdiff(df1,df2)
187页第二题 2. Add the location of the origin and destination (i.e., the lat and lon) to flights
airports1 <- airports%>%
select(faa,lat,lon)
flighs%>%
left_join(airports1,by=c("origin"="faa"))%>%
left_join(airports1,by=c("dest"="faa"),suffix=c(".origin",".dest"))
191页第2题 2. Filter flights to only show flights with planes that have flown at least 100 flights
flights_100 <- flights%>%
count(tailnum)%>%
filter(n>=100)%>%
select(tailnum)
flights%>%
semi_join(flights_100,"tailnum")
str <- c("a","R for data science",NA)
str
[1] "a" "R for data science" NA
str_length(str)
[1] 1 18 NA
str <- c("abc",NA)
str
[1] "abc" NA
str_c("|-",str_replace_na(str),"-|",collapse = "-")
[1] "|-abc-|-|-NA-|"
x <- c("Apple","Bannana","Pear")
x
[1] "Apple" "Bannana" "Pear"
str_sub(x,-3,-1)
[1] "ple" "ana" "ear"
x <- "1888 is the longest year in Roman numerals: MDCCCLXXXVIII"
x
[1] "1888 is the longest year in Roman numerals: MDCCCLXXXVIII"
str_view(x,"CC?")
str_view(x,"CC+")
str_view(x,"C[XL]+")
如何匹配美国电话号码 xxx-xxx- xxxx 或(xxx)xxx- xxxx 或xxx-xxxx?
x <- c("123-456-1234","(123)456-1234","123-1234","133423")
x
[1] "123-456-1234" "(123)456-1234" "123-1234" "133423"
str_view(x,"((\\d{3}\\-)|\\(\\d{3}\\)|)\\d{3}\\-\\d{4}")
如何改为懒惰匹配?
x <- "1888 is the longest year in Roman numerals: MDCCCLXXXVIII"
x
[1] "1888 is the longest year in Roman numerals: MDCCCLXXXVIII"
str_view(x,"C{2,3}?")
str_view(x,"C[XL]+?")
数据来自fruit
str_view(fruit,"(..)\\1",match = TRUE)
exercise 1.匹配words中以3个辅音字母开头的单词(str_view中使用match=TRUE)
str_view(words,"^[^aeiou]{3}",match = TRUE)
2.匹配words中以相同字母开头结尾的单词(str_view中使用match=TRUE)
str_view(words,"(^.$)|(^(.).*\\3$)",match = TRUE)
x <- "asdas\"'\\"
writeLines(x)
asdas"'\
str_view(x,"\"'\\\\")
4.给定 stringr::words 中的常用单词语料库,创建正则表达式以找出满足下列条件的所有单词。 a. 以 y 开头的单词。
str_view(words,"^y",match = TRUE)
str_view(words,"$x",match = TRUE)
str_view(words,"^...$",match = TRUE)
str_view(words,".{7,}",match = TRUE)
5.(1) 创建正则表达式来找出符合以下条件的所有单词。 b. 只包含辅音字母的单词(提示:考虑一下匹配“非”元音字母)。
str_view(words,"^[^aeoiu]*$",match = TRUE)
str_view(words,"[^e](ed)$",match = TRUE)
str_view(words,"(ing|ize)$",match = TRUE)
str_view(words,"[aeiou]{3,}",match=TRUE)
str_view(words,"([aeiou][^aeoiu]){2,}",match = TRUE)
g.包含一对重复字母的单词
str_view(words,"(.{1,}).*\\1",match = TRUE)
h.包含一个至少重复 3 次的字母的单词
str_view(words,"(.)(.*\\1){2,}",match = TRUE)
如何计算words有多少个单词以元音字母结尾
mean(str_detect(words,"[aeoiu]$"))
[1] 0.2765306
如何提取出words中符合以元音字母结尾的单词
str_subset(words,"[aeiou]$")
[1] "a" "able" "absolute" "achieve" "active" "advertise" "age" "ago"
[9] "agree" "also" "america" "appropriate" "area" "argue" "arrange" "associate"
[17] "assume" "available" "aware" "balance" "base" "be" "because" "become"
[25] "before" "believe" "bloke" "blue" "bottle" "cake" "care" "case"
[33] "cause" "centre" "chance" "change" "charge" "choice" "choose" "close"
[41] "clothe" "coffee" "colleague" "college" "come" "committee" "compare" "complete"
[49] "compute" "continue" "converse" "couple" "course" "create" "date" "debate"
[57] "decide" "definite" "degree" "describe" "die" "difference" "divide" "do"
[65] "double" "drive" "due" "educate" "else" "encourage" "engine" "europe"
[73] "evidence" "example" "excuse" "exercise" "expense" "experience" "extra" "eye"
[81] "face" "figure" "file" "finance" "fine" "fire" "five" "force"
[89] "fortune" "france" "free" "future" "game" "give" "go" "goodbye"
[97] "hate" "have" "he" "here" "home" "hope" "horse" "house"
[105] "hullo" "idea" "imagine" "improve" "include" "income" "increase" "inside"
[113] "insure" "into" "introduce" "involve" "issue" "judge" "language" "large"
[121] "late" "leave" "lie" "life" "like" "line" "little" "live"
[129] "lose" "love" "machine" "make" "manage" "maybe" "measure" "middle"
[137] "mile" "minute" "more" "move" "name" "nature" "nice" "nine"
[145] "no" "none" "note" "notice" "office" "once" "one" "operate"
[153] "oppose" "organize" "otherwise" "page" "pence" "people" "picture" "piece"
[161] "place" "please" "police" "positive" "possible" "practise" "prepare" "pressure"
[169] "presume" "price" "private" "probable" "produce" "programme" "propose" "provide"
[177] "purpose" "quite" "radio" "raise" "range" "rate" "realise" "receive"
[185] "recognize" "reduce" "require" "resource" "responsible" "rise" "role" "rule"
[193] "safe" "sale" "same" "save" "scheme" "science" "score" "secure"
[201] "see" "sense" "separate" "serve" "service" "settle" "share" "she"
[209] "shoe" "side" "simple" "since" "single" "site" "situate" "size"
[217] "smoke" "so" "some" "space" "square" "stage" "state" "strike"
[225] "structure" "suppose" "sure" "surprise" "table" "take" "tape" "tea"
[233] "telephone" "terrible" "the" "there" "therefore" "thou" "three" "tie"
[241] "time" "to" "too" "trade" "tree" "trouble" "true" "twelve"
[249] "two" "type" "unite" "use" "value" "video" "village" "vote"
[257] "wage" "waste" "we" "wee" "welcome" "where" "while" "white"
[265] "who" "whole" "wide" "wife" "worse" "write" "you"
如何计算words中每个单词含有a字母的个数
str_count(words,"a")
[1] 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 2 1 1 2 2 2 1 1 1 2 1 1 1 2 1
[59] 1 1 1 3 2 2 1 1 1 1 1 2 1 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0
[117] 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 2 1 1 1 2 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[175] 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0
[233] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0
[291] 0 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0
[349] 0 0 1 1 1 1 1 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0
[407] 0 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 2 1 1 1 1 1 1 1 1
[465] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
[523] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1
[581] 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 1 1 3 1 1 1 1 2 1 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1
[639] 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 1 0
[697] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 2 1 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 1 1 0 0 0 0 0
[755] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 1 1 1 2 1 1 1 1 0 0 0 0 0 1 1
[813] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[871] 1 0 0 0 1 0 0 0 0 0 1 0 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1
[929] 1 1 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0
如何找出words中每个单词中u出现的位置
x <- "apple"
head(str_locate_all(words,"a"))
[[1]]
start end
[1,] 1 1
[[2]]
start end
[1,] 1 1
[[3]]
start end
[1,] 1 1
[[4]]
start end
[1,] 1 1
[[5]]
start end
[1,] 1 1
[[6]]
start end
[1,] 1 1
如何提取sentnce中的表示颜色的单词
colors <- c("red","yellow","orange","green","blue","purple")
colors_a <- str_c(" ",colors," ",collapse = "|")
colors_a
[1] " red | yellow | orange | green | blue | purple "
has_color <- str_subset(sentences,colors_a)
colors_b <- str_c(colors,collapse = "|")
colors_b
[1] "red|yellow|orange|green|blue|purple"
str_extract_all(has_color,colors_b,simplify = TRUE)
[,1] [,2]
[1,] "blue" ""
[2,] "blue" ""
[3,] "blue" ""
[4,] "yellow" ""
[5,] "green" ""
[6,] "red" ""
[7,] "blue" ""
[8,] "blue" "red"
[9,] "green" ""
[10,] "red" ""
[11,] "red" ""
[12,] "red" ""
[13,] "green" ""
[14,] "purple" ""
[15,] "green" ""
[16,] "red" ""
[17,] "blue" ""
[18,] "red" ""
[19,] "green" ""
[20,] "green" "red"
[21,] "yellow" ""
[22,] "orange" "red"
[23,] "red" ""
[24,] "red" ""
如何提取出sentence中所有的冠词组合
noun <- "(a|an|the) ([^ ]+)"
has_noun <- str_subset(sentences,noun)
str_match(has_noun,noun)
[,1] [,2] [,3]
[1,] "the smooth" "the" "smooth"
[2,] "the sheet" "the" "sheet"
[3,] "the depth" "the" "depth"
[4,] "a chicken" "a" "chicken"
[5,] "the parked" "the" "parked"
[6,] "the sun" "the" "sun"
[7,] "the huge" "the" "huge"
[8,] "the ball" "the" "ball"
[9,] "the woman" "the" "woman"
[10,] "a helps" "a" "helps"
[11,] "the man's" "the" "man's"
[12,] "the sea." "the" "sea."
[13,] "the booth" "the" "booth"
[14,] "a hole" "a" "hole"
[15,] "the bent" "the" "bent"
[16,] "the pants" "the" "pants"
[17,] "an dive" "an" "dive"
[18,] "the view" "the" "view"
[19,] "the tank." "the" "tank."
[20,] "the tall" "the" "tall"
[21,] "the same" "the" "same"
[22,] "the load" "the" "load"
[23,] "the winding" "the" "winding"
[24,] "the size" "the" "size"
[25,] "the grease" "the" "grease"
[26,] "the coat" "the" "coat"
[27,] "the bell" "the" "bell"
[28,] "the state" "the" "state"
[29,] "the sharp" "the" "sharp"
[30,] "the third" "the" "third"
[31,] "the hot" "the" "hot"
[32,] "the cool" "the" "cool"
[33,] "the square" "the" "square"
[34,] "the seven" "the" "seven"
[35,] "the fence" "the" "fence"
[36,] "the drug" "the" "drug"
[37,] "the coat." "the" "coat."
[38,] "the mouse." "the" "mouse."
[39,] "a flop" "a" "flop"
[40,] "a tool" "a" "tool"
[41,] "the soldiers" "the" "soldiers"
[42,] "a rosebush" "a" "rosebush"
[43,] "the raging" "the" "raging"
[44,] "the slide" "the" "slide"
[45,] "a pencil" "a" "pencil"
[46,] "an half" "an" "half"
[47,] "the third" "the" "third"
[48,] "the field." "the" "field."
[49,] "a hit," "a" "hit,"
[50,] "a grand" "a" "grand"
[51,] "the edge" "the" "edge"
[52,] "the cue" "the" "cue"
[53,] "the point" "the" "point"
[54,] "the sand." "the" "sand."
[55,] "the finished" "the" "finished"
[56,] "a fight." "a" "fight."
[57,] "an out" "an" "out"
[58,] "a nice" "a" "nice"
[59,] "the car" "the" "car"
[60,] "a thin" "a" "thin"
[61,] "the big" "the" "big"
[62,] "the cat" "the" "cat"
[63,] "an two" "an" "two"
[64,] "the fence" "the" "fence"
[65,] "the pie" "the" "pie"
[66,] "the barn" "the" "barn"
[67,] "a limb." "a" "limb."
[68,] "the street." "the" "street."
[69,] "the blue" "the" "blue"
[70,] "an eggs." "an" "eggs."
[71,] "the clear" "the" "clear"
[72,] "the boat," "the" "boat,"
[73,] "the other." "the" "other."
[74,] "an to" "an" "to"
[75,] "the crate" "the" "crate"
[76,] "the sum" "the" "sum"
[77,] "the high" "the" "high"
[78,] "the vat" "the" "vat"
[79,] "the pine" "the" "pine"
[80,] "the fall." "the" "fall."
[81,] "the wind" "the" "wind"
[82,] "the log" "the" "log"
[83,] "the logs" "the" "logs"
[84,] "the carpet" "the" "carpet"
[85,] "a weed" "a" "weed"
[86,] "the simplest" "the" "simplest"
[87,] "the less" "the" "less"
[88,] "an the" "an" "the"
[89,] "an cleanse" "an" "cleanse"
[90,] "a rusty" "a" "rusty"
[91,] "the farther" "the" "farther"
[92,] "the heat" "the" "heat"
[93,] "a swim." "a" "swim."
[94,] "the same" "the" "same"
[95,] "the bright" "the" "bright"
[96,] "an ten." "an" "ten."
[97,] "the eyes" "the" "eyes"
[98,] "the wise" "the" "wise"
[99,] "a fond" "a" "fond"
[100,] "a bad" "a" "bad"
[101,] "a strong" "a" "strong"
[102,] "the rusty" "the" "rusty"
[103,] "the results" "the" "results"
[104,] "the right" "the" "right"
[105,] "a high" "a" "high"
[106,] "a graceful" "a" "graceful"
[107,] "the blotter" "the" "blotter"
[108,] "the front" "the" "front"
[109,] "a hole" "a" "hole"
[110,] "the tin" "the" "tin"
[111,] "an can" "an" "can"
[112,] "a new" "a" "new"
[113,] "the couch." "the" "couch."
[114,] "the crew" "the" "crew"
[115,] "the coin" "the" "coin"
[116,] "the torn" "the" "torn"
[117,] "a cushion" "a" "cushion"
[118,] "the back" "the" "back"
[119,] "the air" "the" "air"
[120,] "the two" "the" "two"
[121,] "the prize." "the" "prize."
[122,] "a dull" "a" "dull"
[123,] "the skill" "the" "skill"
[124,] "the hot" "the" "hot"
[125,] "the broken" "the" "broken"
[126,] "the small" "the" "small"
[127,] "a sound" "a" "sound"
[128,] "the stone" "the" "stone"
[129,] "the best" "the" "best"
[130,] "the ship" "the" "ship"
[131,] "the store's" "the" "store's"
[132,] "the players." "the" "players."
[133,] "the column" "the" "column"
[134,] "a good" "a" "good"
[135,] "the flood" "the" "flood"
[136,] "a head" "a" "head"
[137,] "a fig" "a" "fig"
[138,] "an be" "an" "be"
[139,] "the noise" "the" "noise"
[140,] "a buyer" "a" "buyer"
[141,] "the ice" "the" "ice"
[142,] "the next" "the" "next"
[143,] "the third" "the" "third"
[144,] "the oat" "the" "oat"
[145,] "the attic." "the" "attic."
[146,] "the soap" "the" "soap"
[147,] "a tall" "a" "tall"
[148,] "the rink" "the" "rink"
[149,] "the dance" "the" "dance"
[150,] "the new" "the" "new"
[151,] "the inn." "the" "inn."
[152,] "the worst" "the" "worst"
[153,] "the second" "the" "second"
[154,] "the wall." "the" "wall."
[155,] "a wooden" "a" "wooden"
[156,] "an brass." "an" "brass."
[157,] "the bank" "the" "bank"
[158,] "a young" "a" "young"
[159,] "a steady" "a" "steady"
[160,] "the axe" "the" "axe"
[161,] "a loud" "a" "loud"
[162,] "the rubbish" "the" "rubbish"
[163,] "the gold" "the" "gold"
[164,] "the court" "the" "court"
[165,] "the raft" "the" "raft"
[166,] "an X" "an" "X"
[167,] "the rope" "the" "rope"
[168,] "the best" "the" "best"
[169,] "the islands" "the" "islands"
[170,] "an as" "an" "as"
[171,] "the world" "the" "world"
[172,] "the egg." "the" "egg."
[173,] "a bright" "a" "bright"
[174,] "the fence" "the" "fence"
[175,] "the curtain" "the" "curtain"
[176,] "the middle" "the" "middle"
[177,] "the same" "the" "same"
[178,] "a compass." "a" "compass."
[179,] "an others." "an" "others."
[180,] "the front" "the" "front"
[181,] "the pail" "the" "pail"
[182,] "the big" "the" "big"
[183,] "the empty" "the" "empty"
[184,] "a served" "a" "served"
[185,] "the hot-cross" "the" "hot-cross"
[186,] "the field" "the" "field"
[187,] "the twin" "the" "twin"
[188,] "the cord" "the" "cord"
[189,] "the smuggled" "the" "smuggled"
[190,] "the corner" "the" "corner"
[191,] "the bond" "the" "bond"
[192,] "the ruins." "the" "ruins."
[193,] "a mouldy" "a" "mouldy"
[194,] "a pine" "a" "pine"
[195,] "the cloth" "the" "cloth"
[196,] "the swan" "the" "swan"
[197,] "the screen." "the" "screen."
[198,] "the end" "the" "end"
[199,] "the clay" "the" "clay"
[200,] "a smooth" "a" "smooth"
[201,] "the crackers." "the" "crackers."
[202,] "the high" "the" "high"
[203,] "a pierced" "a" "pierced"
[204,] "an was" "an" "was"
[205,] "the log" "the" "log"
[206,] "the stalk" "the" "stalk"
[207,] "an but" "an" "but"
[208,] "a brew" "a" "brew"
[209,] "the box" "the" "box"
[210,] "the window." "the" "window."
[211,] "the workmen's" "the" "workmen's"
[212,] "the breeze." "the" "breeze."
[213,] "a swan," "a" "swan,"
[214,] "the tire" "the" "tire"
[215,] "the old" "the" "old"
[216,] "the last" "the" "last"
[217,] "a person" "a" "person"
[218,] "the sleeping" "the" "sleeping"
[219,] "the fire" "the" "fire"
[220,] "the round" "the" "round"
[221,] "an and" "an" "and"
[222,] "the ink" "the" "ink"
[223,] "a big" "a" "big"
[224,] "the records" "the" "records"
[225,] "the snow" "the" "snow"
[226,] "the sun's" "the" "sun's"
[227,] "the wall." "the" "wall."
[228,] "the and" "the" "and"
[229,] "a is" "a" "is"
[230,] "the dog" "the" "dog"
[231,] "a book." "a" "book."
[232,] "the fine" "the" "fine"
[233,] "an half" "an" "half"
[234,] "the central" "the" "central"
[235,] "the rough" "the" "rough"
[236,] "the new" "the" "new"
[237,] "the lathe" "the" "lathe"
[238,] "the pole" "the" "pole"
[239,] "a dish" "a" "dish"
[240,] "an gathered" "an" "gathered"
[241,] "the day" "the" "day"
[242,] "the strip" "the" "strip"
[243,] "the stew" "the" "stew"
[244,] "an went" "an" "went"
[245,] "the road." "the" "road."
[246,] "the clear" "the" "clear"
[247,] "a king's" "a" "king's"
[248,] "a good" "a" "good"
[249,] "the white" "the" "white"
[250,] "the stream." "the" "stream."
[251,] "the lead" "the" "lead"
[252,] "the lock." "the" "lock."
[253,] "the council" "the" "council"
[254,] "the red" "the" "red"
[255,] "the ledge." "the" "ledge."
[256,] "the sheep" "the" "sheep"
[257,] "a lot" "a" "lot"
[258,] "the spot" "the" "spot"
[259,] "a fair" "a" "fair"
[260,] "an wash" "an" "wash"
[261,] "the game" "the" "game"
[262,] "a long" "a" "long"
[263,] "the strong" "the" "strong"
[264,] "the rain" "the" "rain"
[265,] "the hot" "the" "hot"
[266,] "the story" "the" "story"
[267,] "the eastern" "the" "eastern"
[268,] "the line" "the" "line"
[269,] "the cuts" "the" "cuts"
[270,] "a dense" "a" "dense"
[271,] "the knee" "the" "knee"
[272,] "the shaky" "the" "shaky"
[273,] "the odor." "the" "odor."
[274,] "an some" "an" "some"
[275,] "the sail" "the" "sail"
[276,] "the dart" "the" "dart"
[277,] "a new" "a" "new"
[278,] "the base." "the" "base."
[279,] "a dog." "a" "dog."
[280,] "a dime," "a" "dime,"
[281,] "an that" "an" "that"
[282,] "the shore." "the" "shore."
[283,] "the wall" "the" "wall"
[284,] "the oath" "the" "oath"
[285,] "the sill" "the" "sill"
[286,] "the steel" "the" "steel"
[287,] "a lag" "a" "lag"
[288,] "the spring" "the" "spring"
[289,] "the chart" "the" "chart"
[290,] "the crowd" "the" "crowd"
[291,] "a and" "a" "and"
[292,] "the wide" "the" "wide"
[293,] "the dust" "the" "dust"
[294,] "the porch" "the" "porch"
[295,] "the finish." "the" "finish."
[296,] "the deep" "the" "deep"
[297,] "the dice" "the" "dice"
[298,] "the frail" "the" "frail"
[299,] "the valve" "the" "valve"
[300,] "the shoe" "the" "shoe"
[301,] "the sausage" "the" "sausage"
[302,] "the rose" "the" "rose"
[303,] "the colt." "the" "colt."
[304,] "a hot" "a" "hot"
[305,] "an wore" "an" "wore"
[306,] "the bike" "the" "bike"
[307,] "the ashes" "the" "ashes"
[308,] "the used" "the" "used"
[309,] "an neck" "an" "neck"
[310,] "the tall" "the" "tall"
[311,] "a thin" "a" "thin"
[312,] "the lantern" "the" "lantern"
[313,] "the soft" "the" "soft"
[314,] "the tenth" "the" "tenth"
[315,] "an to" "an" "to"
[316,] "a good" "a" "good"
[317,] "a vest" "a" "vest"
[318,] "the figs," "the" "figs,"
[319,] "the door" "the" "door"
[320,] "the fire" "the" "fire"
[321,] "the print." "the" "print."
[322,] "the limits" "the" "limits"
[323,] "the high" "the" "high"
[324,] "an end" "an" "end"
[325,] "the old" "the" "old"
[326,] "the dark" "the" "dark"
[327,] "a shaggy" "a" "shaggy"
[328,] "the corner" "the" "corner"
[329,] "the hatch" "the" "hatch"
[330,] "the walnut" "the" "walnut"
[331,] "the form" "the" "form"
[332,] "the stuff" "the" "stuff"
[333,] "the most" "the" "most"
[ reached getOption("max.print") -- omitted 162 rows ]
如果sentences是tibble,如何操作?
tibble(sentence=sentences)%>%
extract(sentence,c("article","nuon"),"(a|an|the) ([^ ]+)",remove = FALSE)
如何使用boundary切分单词
x <- "This is a sentence. This is another sentence."
x
[1] "This is a sentence. This is another sentence."
str_split(x,boundary("word"))
[[1]]
[1] "This" "is" "a" "sentence" "This" "is" "another" "sentence"
str_split(x," ",simplify = TRUE)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "This" "is" "a" "sentence." "This" "is" "another" "sentence."
另外参数n可以设定切分到片段个数
x <- 20190112
x
[1] 20190112
ymd(x)
[1] "2019-01-12"
x <- "January 31 2017"
x
[1] "January 31 2017"
mdy(x)
[1] "2017-01-31"
x <- "2017-01-31 20:11:59"
x
[1] "2017-01-31 20:11:59"
ymd_hms(x)
[1] "2017-01-31 20:11:59 UTC"
flights%>%
transmute(date=make_datetime(year,month,day,minute))
as_datetime(today())
[1] "2019-01-15 UTC"
as_date(now())
[1] "2019-01-15"
Unix时间戳
as_datetime(60*60*24)
[1] "1970-01-02 UTC"
as_date(365*10+1)
[1] "1979-12-31"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d4
[1] "August 19 (2015)" "July 1 (2015)"
mdy(d4)
[1] "2015-08-19" "2015-07-01"
In strsplit(code, "\n", fixed = TRUE) :
input string 1 is invalid in this locale
d5
[1] "12/30/14"
mdy(d5)
[1] "2014-12-30"
datetime <- ymd_hms("2016-07-08 12:34:56")
datetime
[1] "2016-07-08 12:34:56 UTC"
某月的第几天
mday(datetime)
[1] 8
某年的第几天
yday(datetime)
[1] 190
一周的第几天
wday(datetime)
[1] 6
head(floor_date(ymd_hms(flights$dep_time),"week"))
[1] NA NA NA NA NA NA
全部变成一周的第一天的日期
h_age <- today()-ymd(19790112)
h_age
Time difference of 14613 days
as.duration(h_age)
[1] "1262563200s (~40.01 years)"
把年转换成秒
dyears(2)
[1] "63072000s (~2 years)"
夏令时到冬令时可用
days(2)
[1] "2d 0H 0M 0S"
today()+days(2)
[1] "2019-01-17"
如何查询系统时区
Sys.timezone()
[1] "Asia/Taipei"
修改时区,不修改时间
with_tz(today())
[1] "2019-01-15"
修改时区,并修改时间
force_tz(today())
[1] "2019-01-15"
ggplot(diamonds)+
geom_bar(aes(cut,fill=clarity),position = "fill")
mu <- list(5,10,-3)
sigma <- list(1,5,10)
map2(mu,sigma,rnorm,n=10)
[[1]]
[1] 4.947986 5.467590 5.913339 4.274208 4.581593 4.788442 5.614764 4.234672 3.534121 6.198214
[[2]]
[1] 5.725682 8.920260 14.269808 7.049484 13.500637 16.859337 10.962061 6.890432 10.254318 16.126379
[[3]]
[1] -8.693185 -19.604794 -12.804712 6.664651 -4.711103 -3.668457 -7.773991 3.056113 12.705577 20.735267
三种不同索引方式 公式
models <- mtcars%>%
split(.$cyl)%>%
map(~lm(mpg~wt,data=.))
models
$`4`
Call:
lm(formula = mpg ~ wt, data = .)
Coefficients:
(Intercept) wt
39.571 -5.647
$`6`
Call:
lm(formula = mpg ~ wt, data = .)
Coefficients:
(Intercept) wt
28.41 -2.78
$`8`
Call:
lm(formula = mpg ~ wt, data = .)
Coefficients:
(Intercept) wt
23.868 -2.192
字符向量
models%>%
map(summary)%>%
map("r.squared")
$`4`
[1] 0.5086326
$`6`
[1] 0.4645102
$`8`
[1] 0.4229655
数字向量
x <- list(list(1,2,3),list(4,5,6),list(7,8,9))
x%>%
map(2)
[[1]]
[1] 2
[[2]]
[1] 5
[[3]]
[1] 8
如何生成方差不同均值相同的正态分布随机数
mu <- list(5,10,3)
mu%>%
map(rnorm,n=10,mean=1)
[[1]]
[1] 6.822252 -4.427688 4.998521 3.544448 2.806287 -7.899798 5.790980 0.258452 5.080364 1.120691
[[2]]
[1] 3.03120660 0.09106127 -12.44283242 -13.53326395 11.12894609 -13.93980763 -6.59596567 11.50021244 21.48651232
[10] 0.31815504
[[3]]
[1] 1.5483942 -2.4945495 0.8785752 -2.0193341 3.5006245 1.8130725 5.0713825 2.0278612 3.7317933 7.2131028
n <- list(10,20,4)
pmap(list(n,mu,sigma),rnorm)
[[1]]
[1] 4.509272 6.182599 5.696206 5.285198 6.005597 3.434809 4.957345 6.709828 4.398375 5.999906
[[2]]
[1] 13.098816 20.457712 10.746364 12.954228 16.622434 2.002508 3.818095 12.747614 8.466130 19.015718 12.322974
[12] 2.501040 7.620018 13.092890 6.295540 18.705108 7.535133 14.868748 8.395464 2.067585
[[3]]
[1] -9.1810444 -23.4396420 -0.7616694 -4.3489155
f <- c("rnorm","runif","rpois")
params <- list(list(mean=5,sd=5),list(min=1,max=10),list(lambda=10))
invoke_map(f,params,n=10)
[[1]]
[1] 9.545631 8.507999 6.656743 1.534669 14.356332 6.906555 1.337314 6.341718 3.252099 12.137012
[[2]]
[1] 1.495842 1.921696 9.472208 7.557108 6.842509 9.649270 8.729320 7.507460 7.324795 8.749119
[[3]]
[1] 12 6 11 8 12 8 8 15 11 9
x <- list(a=1:5,b=3:4,c=5:6)
x
$`a`
[1] 1 2 3 4 5
$b
[1] 3 4
$c
[1] 5 6
df <-enframe(x)
df
df%>%
mutate(smry=map2_chr(name,value,~str_c(.x,":",.y[1])))
df%>%
group_by(name)%>%
mutate(smry=map2_chr(name,unlist(value)[1],str_c,sep=":"))
library(modelr)
library(tidyverse)
library(gapminder)
gapminder
by_country <- gapminder%>%
nest(-country,-continent)
by_country
如何取出data列的第一个元素
by_country$data[[1]]
拟合每个国家模型
country_model <- function(df){
lm(lifeExp~year,data=df)
}
by_country <- by_country%>%
mutate(model=map(data,country_model))
by_country
添加残差
by_country <- by_country%>%
mutate(resid=map2(data,model,add_residuals))
by_country
unnest 画出残差图
resid <- by_country%>%
unnest(resid)
resid
使用broom获取模型摘要
library(broom)
by_country%>%
mutate(glance=map(model,glance))%>%
unnest(glance,.drop = TRUE)