data("mtcars")
skimr::skim(mtcars)
Name | mtcars |
Number of rows | 32 |
Number of columns | 11 |
_______________________ | |
Column type frequency: | |
numeric | 11 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
mpg | 0 | 1 | 20.09 | 6.03 | 10.40 | 15.43 | 19.20 | 22.80 | 33.90 | ▃▇▅▁▂ |
cyl | 0 | 1 | 6.19 | 1.79 | 4.00 | 4.00 | 6.00 | 8.00 | 8.00 | ▆▁▃▁▇ |
disp | 0 | 1 | 230.72 | 123.94 | 71.10 | 120.83 | 196.30 | 326.00 | 472.00 | ▇▃▃▃▂ |
hp | 0 | 1 | 146.69 | 68.56 | 52.00 | 96.50 | 123.00 | 180.00 | 335.00 | ▇▇▆▃▁ |
drat | 0 | 1 | 3.60 | 0.53 | 2.76 | 3.08 | 3.70 | 3.92 | 4.93 | ▇▃▇▅▁ |
wt | 0 | 1 | 3.22 | 0.98 | 1.51 | 2.58 | 3.33 | 3.61 | 5.42 | ▃▃▇▁▂ |
qsec | 0 | 1 | 17.85 | 1.79 | 14.50 | 16.89 | 17.71 | 18.90 | 22.90 | ▃▇▇▂▁ |
vs | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
am | 0 | 1 | 0.41 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
gear | 0 | 1 | 3.69 | 0.74 | 3.00 | 3.00 | 4.00 | 4.00 | 5.00 | ▇▁▆▁▂ |
carb | 0 | 1 | 2.81 | 1.62 | 1.00 | 2.00 | 2.00 | 4.00 | 8.00 | ▇▂▅▁▁ |
mtcars %>% distinct(cyl)
## cyl
## Mazda RX4 6
## Datsun 710 4
## Hornet Sportabout 8
data <- read_excel("../00_data/MyData-Charts.xlsx")
data_simpler <- read_excel("../00_data/Datasimpler.xlsx")
Case of numeric variables
mtcars %>%
# mutate(char_var = "A") %>%
map(mean) # map(.x = ., .f = mean)
## $mpg
## [1] 20.09062
##
## $cyl
## [1] 6.1875
##
## $disp
## [1] 230.7219
##
## $hp
## [1] 146.6875
##
## $drat
## [1] 3.596563
##
## $wt
## [1] 3.21725
##
## $qsec
## [1] 17.84875
##
## $vs
## [1] 0.4375
##
## $am
## [1] 0.40625
##
## $gear
## [1] 3.6875
##
## $carb
## [1] 2.8125
Create your own function
muliply_by_factor <- function(x, factor) {x * factor}
10 %>% muliply_by_factor(factor = 2)
## [1] 20
mtcars %>% map(.x = ., .f = ~muliply_by_factor(x = .x, factor = 2))
## $mpg
## [1] 42.0 42.0 45.6 42.8 37.4 36.2 28.6 48.8 45.6 38.4 35.6 32.8 34.6 30.4 20.8
## [16] 20.8 29.4 64.8 60.8 67.8 43.0 31.0 30.4 26.6 38.4 54.6 52.0 60.8 31.6 39.4
## [31] 30.0 42.8
##
## $cyl
## [1] 12 12 8 12 16 12 16 8 8 12 12 16 16 16 16 16 16 8 8 8 8 16 16 16 16
## [26] 8 8 8 16 12 16 8
##
## $disp
## [1] 320.0 320.0 216.0 516.0 720.0 450.0 720.0 293.4 281.6 335.2 335.2 551.6
## [13] 551.6 551.6 944.0 920.0 880.0 157.4 151.4 142.2 240.2 636.0 608.0 700.0
## [25] 800.0 158.0 240.6 190.2 702.0 290.0 602.0 242.0
##
## $hp
## [1] 220 220 186 220 350 210 490 124 190 246 246 360 360 360 410 430 460 132 104
## [20] 130 194 300 300 490 350 132 182 226 528 350 670 218
##
## $drat
## [1] 7.80 7.80 7.70 6.16 6.30 5.52 6.42 7.38 7.84 7.84 7.84 6.14 6.14 6.14 5.86
## [16] 6.00 6.46 8.16 9.86 8.44 7.40 5.52 6.30 7.46 6.16 8.16 8.86 7.54 8.44 7.24
## [31] 7.08 8.22
##
## $wt
## [1] 5.240 5.750 4.640 6.430 6.880 6.920 7.140 6.380 6.300 6.880
## [11] 6.880 8.140 7.460 7.560 10.500 10.848 10.690 4.400 3.230 3.670
## [21] 4.930 7.040 6.870 7.680 7.690 3.870 4.280 3.026 6.340 5.540
## [31] 7.140 5.560
##
## $qsec
## [1] 32.92 34.04 37.22 38.88 34.04 40.44 31.68 40.00 45.80 36.60 37.80 34.80
## [13] 35.20 36.00 35.96 35.64 34.84 38.94 37.04 39.80 40.02 33.74 34.60 30.82
## [25] 34.10 37.80 33.40 33.80 29.00 31.00 29.20 37.20
##
## $vs
## [1] 0 0 2 2 0 2 0 2 2 2 2 0 0 0 0 0 0 2 2 2 2 0 0 0 0 2 0 2 0 0 0 2
##
## $am
## [1] 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2
##
## $gear
## [1] 8 8 8 6 6 6 6 8 8 8 8 6 6 6 6 6 6 8 8 8 6 6 6 6 6
## [26] 8 10 10 10 10 10 8
##
## $carb
## [1] 8 8 2 2 4 2 8 4 4 8 8 6 6 6 8 8 8 2 4 2 2 4 4 8 4
## [26] 2 4 4 8 12 16 4
mtcars %>% map_dfc(.x = ., .f = ~muliply_by_factor(x = .x, factor = 2))
## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42 12 320 220 7.8 5.24 32.9 0 2 8 8
## 2 42 12 320 220 7.8 5.75 34.0 0 2 8 8
## 3 45.6 8 216 186 7.7 4.64 37.2 2 2 8 2
## 4 42.8 12 516 220 6.16 6.43 38.9 2 0 6 2
## 5 37.4 16 720 350 6.3 6.88 34.0 0 0 6 4
## 6 36.2 12 450 210 5.52 6.92 40.4 2 0 6 2
## 7 28.6 16 720 490 6.42 7.14 31.7 0 0 6 8
## 8 48.8 8 293. 124 7.38 6.38 40 2 0 8 4
## 9 45.6 8 282. 190 7.84 6.3 45.8 2 0 8 4
## 10 38.4 12 335. 246 7.84 6.88 36.6 2 0 8 8
## # … with 22 more rows
mtcars %>% map(muliply_by_factor, factor = 2)
## $mpg
## [1] 42.0 42.0 45.6 42.8 37.4 36.2 28.6 48.8 45.6 38.4 35.6 32.8 34.6 30.4 20.8
## [16] 20.8 29.4 64.8 60.8 67.8 43.0 31.0 30.4 26.6 38.4 54.6 52.0 60.8 31.6 39.4
## [31] 30.0 42.8
##
## $cyl
## [1] 12 12 8 12 16 12 16 8 8 12 12 16 16 16 16 16 16 8 8 8 8 16 16 16 16
## [26] 8 8 8 16 12 16 8
##
## $disp
## [1] 320.0 320.0 216.0 516.0 720.0 450.0 720.0 293.4 281.6 335.2 335.2 551.6
## [13] 551.6 551.6 944.0 920.0 880.0 157.4 151.4 142.2 240.2 636.0 608.0 700.0
## [25] 800.0 158.0 240.6 190.2 702.0 290.0 602.0 242.0
##
## $hp
## [1] 220 220 186 220 350 210 490 124 190 246 246 360 360 360 410 430 460 132 104
## [20] 130 194 300 300 490 350 132 182 226 528 350 670 218
##
## $drat
## [1] 7.80 7.80 7.70 6.16 6.30 5.52 6.42 7.38 7.84 7.84 7.84 6.14 6.14 6.14 5.86
## [16] 6.00 6.46 8.16 9.86 8.44 7.40 5.52 6.30 7.46 6.16 8.16 8.86 7.54 8.44 7.24
## [31] 7.08 8.22
##
## $wt
## [1] 5.240 5.750 4.640 6.430 6.880 6.920 7.140 6.380 6.300 6.880
## [11] 6.880 8.140 7.460 7.560 10.500 10.848 10.690 4.400 3.230 3.670
## [21] 4.930 7.040 6.870 7.680 7.690 3.870 4.280 3.026 6.340 5.540
## [31] 7.140 5.560
##
## $qsec
## [1] 32.92 34.04 37.22 38.88 34.04 40.44 31.68 40.00 45.80 36.60 37.80 34.80
## [13] 35.20 36.00 35.96 35.64 34.84 38.94 37.04 39.80 40.02 33.74 34.60 30.82
## [25] 34.10 37.80 33.40 33.80 29.00 31.00 29.20 37.20
##
## $vs
## [1] 0 0 2 2 0 2 0 2 2 2 2 0 0 0 0 0 0 2 2 2 2 0 0 0 0 2 0 2 0 0 0 2
##
## $am
## [1] 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2
##
## $gear
## [1] 8 8 8 6 6 6 6 8 8 8 8 6 6 6 6 6 6 8 8 8 6 6 6 6 6
## [26] 8 10 10 10 10 10 8
##
## $carb
## [1] 8 8 2 2 4 2 8 4 4 8 8 6 6 6 8 8 8 2 4 2 2 4 4 8 4
## [26] 2 4 4 8 12 16 4
When you have a grouping variable (factor)
reg_coeff_tbl <- mtcars %>%
# Split the data frame into a list by a factor
split(.$cyl) %>%
# Repeat the same operation over each element
map(~lm(mpg ~ wt, data = .)) %>%
# Return regression coefficients in a tidy tibble
map(broom::tidy, conf.int = TRUE) %>%
# Bind multiple data frames by row
bind_rows(.id = "cyl") %>%
# Filter for coefficient of interest
filter(term == "wt")
reg_coeff_tbl %>%
ggplot(aes(estimate, cyl)) +
geom_point(aes(color = cyl), size = 3) +
geom_errorbarh(aes(xmin=conf.low, xmax = conf.high)) +
theme(legend.position = "none")
Choose either one of the two cases above and apply it to your data
data %>%
# mutate(char_var = "A") %>%
map(max) # map(.x = ., .f = mean)
## $year
## [1] 2021
##
## $months
## [1] "October-December"
##
## $state
## [1] "Wyoming"
##
## $colony_n
## [1] NA
##
## $colony_max
## [1] "NA"
##
## $colony_lost
## [1] NA
##
## $colony_lost_pct
## [1] NA
##
## $colony_added
## [1] "NA"
##
## $colony_reno
## [1] "NA"
##
## $colony_reno_pct
## [1] "NA"
##
## $`Growth of colonies`
## [1] NA
Create your own function
muliply_by_factor <- function(x, factor) {x * factor}
10 %>% muliply_by_factor(factor = 2)
## [1] 20