“Writing code that is easy to read is really important because every single project that you do involves corroboration between at least two people; present you and future you. Future you will be very grateful to present you, if you write code that’s easy to understand”- Hadley Wickham
| keyboard_command | Description |
|---|---|
| Ctrl + Shift + k | Knit document |
| Ctrl + Alt + I | Create code block |
| Shift Alt + | Turn csv into chr vector |
| rm (list=ls()) | Remove all variables in rstudio |
| Ctrl + Shift + Enter | Run Current Block |
| Ctrl + Shift + S | Run all blocks |
| Home/End + Shift Home/End | Navigate to beginning/end of line then select whole line |
| Alt + Enter | Run current highlighted text/current line |
| Ctrl+shit+M | piping character |
link to article with Barecombine description
#devtools::install_github("hrbrmstr/hrbraddins")
#c This csv line now, a, character, vector
#vector_paste()rm(list=ls())“A Functional is a function that takes a function as an input and returns a vector as an output” Advanced R Hadley Wickham
#Lapply
##functional(object, function)Uwe’s Maxim: “Computers are cheap and thinking hurts”
Aesthetics
Time saved writing code
Minimization of input error
Faster code
Idea for this example was taken from Hadley Wickham advanced R
set.seed(10)
df <- data.frame(replicate(4, sample(c(1:10), 4, rep = TRUE)))
names(df) <- letters[1:4]
df## a b c d
## 1 6 1 7 2
## 2 4 3 5 6
## 3 5 3 7 4
## 4 7 3 6 5
no_functions_df <- df
## this is alot of typing alot of pasting and overall very prone to mistakes
no_functions_df$a[no_functions_df$a == 5] <- 2
no_functions_df$b[no_functions_df$b == 5] <- 2
no_functions_df$c[no_functions_df$c == 5] <- 2
no_functions_df$d[no_functions_df$d == 5] <- 2
no_functions_df## a b c d
## 1 6 1 7 2
## 2 4 3 2 6
## 3 2 3 7 4
## 4 7 3 6 2
# We could write a function
# call this function on each column, This is a little less code and a little cleaner, less prone to error
function_df <- df
replace_5s <- function(x){
x[x==5] <- 2
x
}
function_df$a <- replace_5s(function_df$a)
function_df$b <- replace_5s(function_df$b)
function_df$c <- replace_5s(function_df$c)
function_df$d <- replace_5s(function_df$d)
#with_function_df
# We could use a functional
heres_the_winner <- df
heres_the_winner[]<- lapply(heres_the_winner, replace_5s)
#str(heres_the_winner)
#Example of how map works
#let's say you have measurements columns you want to standardize
mapped<- Map("*",heres_the_winner,c(10,100))
str(mapped)## List of 4
## $ a: num [1:4] 60 40 20 70
## $ b: num [1:4] 100 300 300 300
## $ c: num [1:4] 70 20 70 60
## $ d: num [1:4] 200 600 400 200
I encourage you to take a look at this article referenced earlier
# for (x in seq_along(1:10000000)){
# mylist[x] <- append (mylist,x)
# }#loads in string
mystring <- read_file("tournamentinfo.txt")
#get everything in between |
trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
#split on |
splited_trial <- str_split(trial[3:131],"\\|")
splited_trial[1:2]## [[1]]
## [1] ""
## [2] " GARY HUA "
## [3] "6.0 "
## [4] "W 39"
## [5] "W 21"
## [6] "W 18"
## [7] "W 14"
## [8] "W 7"
## [9] "D 12"
## [10] "D 4"
## [11] ""
##
## [[2]]
## [1] ""
## [2] " 15445895 / R: 1794 ->1817 "
## [3] "N:2 "
## [4] "W "
## [5] "B "
## [6] "W "
## [7] "B "
## [8] "W "
## [9] "B "
## [10] "W "
## [11] ""
# We want the evens from splitted trial below, lets try for loop
start_ranks <- c(rep(NA,63))
#tracemem(start_ranks)
for (x in 1:length(splited_trial)){
if (x%%2==0){
my_ranks<-splited_trial[[x]][2]
start_ranks[x/2] <- unlist(str_split(unlist(str_extract(my_ranks,":\\s+[[:digit:]]+")),"\\s+"))[2]
}
}
start_ranks## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175" "1163"
trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
splited_trial <- str_split(trial[3:131],"\\|")
start_ranks <- rep(NA,63)
evens <-splited_trial[seq(from=2,to=127,by=2)]
#tracemem(start_ranks)
for (x in 1:length(evens)){
my_ranks<-evens[[x]][2]
start_ranks[x] <- unlist(str_split(as_data_frame(str_extract(my_ranks,":\\s+[[:digit:]]+")),"\\s+"))[2]
}trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
splited_trial <- str_split(trial[3:131],"\\|")
evens <-splited_trial[seq(from=2,to=127,by=2)]
vector_ranks <- lapply(evens, `[[`, 2)
#tracemem(vector_ranks)
vector_ranks <-unlist(str_split(unlist(str_extract(vector_ranks,":\\s+[[:digit:]]+")),"\\s+"))
vector_ranks <-vector_ranks[seq(from=2,to=127,by=2)]
vector_ranks## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175"
#tracemem(mystring)
piped_Vectorization<- unlist(str_extract_all(mystring,"\\|.*|$"))[-(1:2)] %>%
str_split(.,"\\|") %>%
.[seq(from=2,to=127,by=2)] %>%
lapply(., `[[`, 2) %>%
str_extract(.,":\\s+[[:digit:]]+") %>%
str_split(.,"\\s+") %>%
lapply(., `[[`, 2) %>%
unlist(.)
piped_Vectorization ## [1] "1794" "1553" "1384" "1716" "1655" "1686" "1649" "1641" "1411" "1365"
## [11] "1712" "1663" "1666" "1610" "1220" "1604" "1629" "1600" "1564" "1595"
## [21] "1563" "1555" "1363" "1229" "1745" "1579" "1552" "1507" "1602" "1522"
## [31] "1494" "1441" "1449" "1399" "1438" "1355" "980" "1423" "1436" "1348"
## [41] "1403" "1332" "1283" "1199" "1242" "377" "1362" "1382" "1291" "1056"
## [51] "1011" "935" "1393" "1270" "1186" "1153" "1092" "917" "853" "967"
## [61] "955" "1530" "1175"
final_results <- benchmark("FIRST CODE "={
trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
splited_trial <- str_split(trial[3:131],"\\|")
start_ranks <- c(rep(NA,63))
for (x in 1:length(splited_trial)){
if (x%%2==0){
my_ranks<-splited_trial[[x]][2]
start_ranks[x/2] <- unlist(str_split(unlist(str_extract(my_ranks,":\\s+[[:digit:]]+")),"\\s+"))[2]
}
}
}, " SECOND CODE"={
trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
splited_trial <- str_split(trial[3:131],"\\|")
start_ranks_2 <- rep(NA,63)
evens <-splited_trial[seq(from=2,to=127,by=2)]
for (x in 1:length(evens)){
my_ranks<-evens[[x]][2]
start_ranks_2[x] <- unlist(str_split(unlist(str_extract(my_ranks,":\\s+[[:digit:]]+")),"\\s+"))[2]
}
}," VECTORIZED CODE"={
trial <- unlist(str_extract_all(mystring,"\\|.*|$"))
splited_trial <- str_split(trial[3:131],"\\|")
evens <-splited_trial[seq(from=2,to=127,by=2)]
vector_ranks <- lapply(evens, `[[`, 2)
vector_ranks <-unlist(str_split(unlist(str_extract(vector_ranks,":\\s+[[:digit:]]+")),"\\s+"))
vector_ranks <-vector_ranks[seq(from=2,to=127,by=2)]
}," VECTORIZED PIPED CODE "={piped_Vectorization<- str_extract_all(mystring,"\\|.*|$") %>%
unlist() %>%
.[-(1:2)]%>%
str_split(.,"\\|") %>%
.[seq(from=2,to=127,by=2)] %>%
lapply(., `[[`, 2) %>%
str_extract(.,":\\s+[[:digit:]]+") %>%
str_split(.,"\\s+") %>%
lapply(., `[[`, 2) %>%
unlist(.)
},replications = 200,
columns = c("test", "replications", "elapsed",
"relative", "user.self", "sys.self"))
final_results <- as_data_frame(final_results)
colnames(final_results) <- c("Code Progression", "Replications", "Total_Time","relative", "user.self", "sys.self")
final_results <- arrange(final_results,desc(Total_Time))
kable(final_results) | Code Progression | Replications | Total_Time | relative | user.self | sys.self |
|---|---|---|---|---|---|
| FIRST CODE | 200 | 1.87 | 11.687 | 1.87 | 0 |
| SECOND CODE | 200 | 1.60 | 10.000 | 1.61 | 0 |
| VECTORIZED PIPED CODE | 200 | 0.20 | 1.250 | 0.20 | 0 |
| VECTORIZED CODE | 200 | 0.16 | 1.000 | 0.16 | 0 |