files <- list.files("simulated_data_from_1990_to_2005", pattern = "*.csv")
# Loop solution
all_csv <- list()
init_time <- Sys.time()
for (file in files) {
all_csv[[file]] <- fread(paste0("simulated_data_from_1990_to_2005/",
file), data.table = FALSE)
}
Sys.time() - init_time
## Time difference of 0.2639539 secs
## loop function time: 0.268939
# Purrr solution
init_time <- Sys.time()
all_csv_purrr <- map(paste0("simulated_data_from_1990_to_2005/",
files), fread)
cat("map function time: ", Sys.time() - init_time)
## map function time: 0.03615785
## $years
## [1] 1990
##
## $a
## [1] 4.920349
##
## $b
## [1] 201.0297
data.frame(average = map_dbl(all_csv[[1]], ~mean(.x)),
logical = map_lgl(all_csv[[1]], ~mean(.x) > 5))
## average logical
## years 1990.000000 TRUE
## a 4.920349 FALSE
## b 201.029747 TRUE
data("sw_films")
sw_films <- sw_films %>%
set_names(map_chr(sw_films, "title"))
waterfowl_data <- list(
LakeErieS = c(0, 0, 10, 5),
LakeErieN = c(0, 0, 1000, 5),
LakeErieW = c(10000, 0, 0, 1),
LakeErieE = c(10, 10, 5, 0)
)
map(waterfowl_data, ~.x %>% sum() %>% log())
## $LakeErieS
## [1] 2.70805
##
## $LakeErieN
## [1] 6.912743
##
## $LakeErieW
## [1] 9.21044
##
## $LakeErieE
## [1] 3.218876
# List of sites north, east, and west
sites <- list("north", "east", "west")
# Create a list of dataframes, each with a years, a, and b column
list_of_df <- map(sites,
~data.frame(name = .x,
a = rnorm(mean = 5, n = 200, sd = 5/2),
b = rnorm(mean = 200, n = 200, sd = 15)))
# Map over the models to look at the relationship of a vs b
list_of_df %>%
map(~ lm(a ~ b, data = .)) %>%
map(summary)
## [[1]]
##
## Call:
## lm(formula = a ~ b, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3707 -1.4360 0.1539 1.5372 8.0580
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.60056 2.77597 1.657 0.099 .
## b 0.00242 0.01378 0.176 0.861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.711 on 198 degrees of freedom
## Multiple R-squared: 0.0001557, Adjusted R-squared: -0.004894
## F-statistic: 0.03084 on 1 and 198 DF, p-value: 0.8608
##
##
## [[2]]
##
## Call:
## lm(formula = a ~ b, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5307 -1.6329 -0.0207 1.5045 7.2908
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.084166 2.244388 2.711 0.0073 **
## b -0.005489 0.011307 -0.485 0.6279
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.388 on 198 degrees of freedom
## Multiple R-squared: 0.001189, Adjusted R-squared: -0.003855
## F-statistic: 0.2357 on 1 and 198 DF, p-value: 0.6279
##
##
## [[3]]
##
## Call:
## lm(formula = a ~ b, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3305 -1.5904 0.1304 1.7518 8.9316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.947289 2.191565 1.801 0.0732 .
## b 0.004746 0.010934 0.434 0.6647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.569 on 198 degrees of freedom
## Multiple R-squared: 0.0009506, Adjusted R-squared: -0.004095
## F-statistic: 0.1884 on 1 and 198 DF, p-value: 0.6647
# Pull out the director element of sw_films in a list and character vector
map(sw_films, ~.x[["director"]])
## $`A New Hope`
## [1] "George Lucas"
##
## $`Attack of the Clones`
## [1] "George Lucas"
##
## $`The Phantom Menace`
## [1] "George Lucas"
##
## $`Revenge of the Sith`
## [1] "George Lucas"
##
## $`Return of the Jedi`
## [1] "Richard Marquand"
##
## $`The Empire Strikes Back`
## [1] "Irvin Kershner"
##
## $`The Force Awakens`
## [1] "J. J. Abrams"
## A New Hope Attack of the Clones The Phantom Menace
## "George Lucas" "George Lucas" "George Lucas"
## Revenge of the Sith Return of the Jedi The Empire Strikes Back
## "George Lucas" "Richard Marquand" "Irvin Kershner"
## The Force Awakens
## "J. J. Abrams"
# Compare outputs when checking if director is George Lucas
map(sw_films, ~.x[["director"]] == "George Lucas")
## $`A New Hope`
## [1] TRUE
##
## $`Attack of the Clones`
## [1] TRUE
##
## $`The Phantom Menace`
## [1] TRUE
##
## $`Revenge of the Sith`
## [1] TRUE
##
## $`Return of the Jedi`
## [1] FALSE
##
## $`The Empire Strikes Back`
## [1] FALSE
##
## $`The Force Awakens`
## [1] FALSE
## A New Hope Attack of the Clones The Phantom Menace
## TRUE TRUE TRUE
## Revenge of the Sith Return of the Jedi The Empire Strikes Back
## TRUE FALSE FALSE
## The Force Awakens
## FALSE
# List of 1, 2 and 3
means <- list(1,2,3)
# Create sites list
sites <- list("north","west", "east")
# Map over two arguments: sites and means
list_of_files_map2 <- map2(sites, means, ~data.frame(sites = .x,
a = rnorm(mean = .y, n = 200, sd = (5/2))))
list_of_files_map2[[1]][1:10,]
## sites a
## 1 north 0.379258658
## 2 north 1.411342440
## 3 north 6.116933682
## 4 north -0.166818119
## 5 north -0.005043385
## 6 north 2.423186565
## 7 north 0.240921771
## 8 north -1.452356184
## 9 north -0.182885834
## 10 north 0.617260259
list_of_means <- list(5, 2, 10, 15)
list_if_sd <- list(0.6, 0.1, 3, 4)
list_of_smpls <- list(50, 100, 200, 250)
sim_data <- list()
# Loop solutions
for (i in list_of_means) {
for (j in list_if_sd) {
for (k in list_of_smpls) {
num <- 1
sim_data[[1]] <- rnorm(mean = i, sd = j, n = k)
num <- num + 1
}
}
}
# purrr solution
input_list <- list(list_of_means,
list_if_sd,
list_of_smpls)
sim_data <- pmap(input_list,
function(means, sd, samplesize)
data.frame(a = rnorm(mean = means,
sd = sd,
n = samplesize)))
## Warning in .f(...): Se han producido NaNs
## [[1]]
## [[1]]$result
## [1] NaN
##
## [[1]]$error
## NULL
##
##
## [[2]]
## [[2]]$result
## [1] 0
##
## [[2]]$error
## NULL
##
##
## [[3]]
## [[3]]$result
## [1] 2.302585
##
## [[3]]$error
## NULL
##
##
## [[4]]
## [[4]]$result
## [1] -Inf
##
## [[4]]$error
## NULL
## NULL
## NULL
# Take the log of each element in the list
a <- list(-10, 1, 10, 0) %>%
map(possibly(function(x){
log(x)
},otherwise = NA_real_))
## Warning in log(x): Se han producido NaNs
# Load the gap_split data
data(gap_split)
# Map over the first 10 elements of gap_split
plots <- map2(gap_split[1:10],
names(gap_split[1:10]),
~ ggplot(.x, aes(year, lifeExp)) +
geom_line() +
labs(title = .y))
# Object name, then function name
walk(plots, print)
## final example
# Turn data into correct dataframe format
film_by_character <- tibble(filmtitle = map_chr(sw_films, "title")) %>%
mutate(filmtitle, characters = map(sw_films, "characters")) %>%
tidyr::unnest()
# Pull out elements from sw_people
sw_characters <- map_df(sw_people, `[`, c("height", "mass", "name", "url"))
# Join the two new objects
character_data <- inner_join(film_by_character, sw_characters, by = c("characters" = "url")) %>%
# Make sure the columns are numbers
mutate(height = as.numeric(height), mass = as.numeric(mass))
## Warning: NAs introducidos por coerción
## Warning: NAs introducidos por coerción
# Plot the heights, faceted by film title
ggplot(character_data, aes(x = height)) +
geom_histogram(stat = "count") +
facet_wrap(~ filmtitle)
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 6 rows containing non-finite values (stat_count).