data("mtcars")
skimr::skim(mtcars)
Name | mtcars |
Number of rows | 32 |
Number of columns | 11 |
_______________________ | |
Column type frequency: | |
numeric | 11 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
mpg | 0 | 1 | 20.09 | 6.03 | 10.40 | 15.43 | 19.20 | 22.80 | 33.90 | ▃▇▅▁▂ |
cyl | 0 | 1 | 6.19 | 1.79 | 4.00 | 4.00 | 6.00 | 8.00 | 8.00 | ▆▁▃▁▇ |
disp | 0 | 1 | 230.72 | 123.94 | 71.10 | 120.83 | 196.30 | 326.00 | 472.00 | ▇▃▃▃▂ |
hp | 0 | 1 | 146.69 | 68.56 | 52.00 | 96.50 | 123.00 | 180.00 | 335.00 | ▇▇▆▃▁ |
drat | 0 | 1 | 3.60 | 0.53 | 2.76 | 3.08 | 3.70 | 3.92 | 4.93 | ▇▃▇▅▁ |
wt | 0 | 1 | 3.22 | 0.98 | 1.51 | 2.58 | 3.33 | 3.61 | 5.42 | ▃▃▇▁▂ |
qsec | 0 | 1 | 17.85 | 1.79 | 14.50 | 16.89 | 17.71 | 18.90 | 22.90 | ▃▇▇▂▁ |
vs | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
am | 0 | 1 | 0.41 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
gear | 0 | 1 | 3.69 | 0.74 | 3.00 | 3.00 | 4.00 | 4.00 | 5.00 | ▇▁▆▁▂ |
carb | 0 | 1 | 2.81 | 1.62 | 1.00 | 2.00 | 2.00 | 4.00 | 8.00 | ▇▂▅▁▁ |
mtcars %>% distinct(cyl)
## cyl
## Mazda RX4 6
## Datsun 710 4
## Hornet Sportabout 8
myData <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-29/wcmatches.csv')
## Rows: 900 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): country, city, stage, home_team, away_team, outcome, win_conditio...
## dbl (3): year, home_score, away_score
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Case of numeric variables
mtcars %>%
# mutate(char_var = "A") %>%
map(mean) # map(.x = ., .f = mean)
## $mpg
## [1] 20.09062
##
## $cyl
## [1] 6.1875
##
## $disp
## [1] 230.7219
##
## $hp
## [1] 146.6875
##
## $drat
## [1] 3.596563
##
## $wt
## [1] 3.21725
##
## $qsec
## [1] 17.84875
##
## $vs
## [1] 0.4375
##
## $am
## [1] 0.40625
##
## $gear
## [1] 3.6875
##
## $carb
## [1] 2.8125
Create your own function
multiply_by_factor <- function(x, factor) {x * factor}
10 %>% multiply_by_factor(factor = 2)
## [1] 20
mtcars %>% map(.x = ., .f = ~multiply_by_factor(x = .x, factor = 2))
## $mpg
## [1] 42.0 42.0 45.6 42.8 37.4 36.2 28.6 48.8 45.6 38.4 35.6 32.8 34.6 30.4 20.8
## [16] 20.8 29.4 64.8 60.8 67.8 43.0 31.0 30.4 26.6 38.4 54.6 52.0 60.8 31.6 39.4
## [31] 30.0 42.8
##
## $cyl
## [1] 12 12 8 12 16 12 16 8 8 12 12 16 16 16 16 16 16 8 8 8 8 16 16 16 16
## [26] 8 8 8 16 12 16 8
##
## $disp
## [1] 320.0 320.0 216.0 516.0 720.0 450.0 720.0 293.4 281.6 335.2 335.2 551.6
## [13] 551.6 551.6 944.0 920.0 880.0 157.4 151.4 142.2 240.2 636.0 608.0 700.0
## [25] 800.0 158.0 240.6 190.2 702.0 290.0 602.0 242.0
##
## $hp
## [1] 220 220 186 220 350 210 490 124 190 246 246 360 360 360 410 430 460 132 104
## [20] 130 194 300 300 490 350 132 182 226 528 350 670 218
##
## $drat
## [1] 7.80 7.80 7.70 6.16 6.30 5.52 6.42 7.38 7.84 7.84 7.84 6.14 6.14 6.14 5.86
## [16] 6.00 6.46 8.16 9.86 8.44 7.40 5.52 6.30 7.46 6.16 8.16 8.86 7.54 8.44 7.24
## [31] 7.08 8.22
##
## $wt
## [1] 5.240 5.750 4.640 6.430 6.880 6.920 7.140 6.380 6.300 6.880
## [11] 6.880 8.140 7.460 7.560 10.500 10.848 10.690 4.400 3.230 3.670
## [21] 4.930 7.040 6.870 7.680 7.690 3.870 4.280 3.026 6.340 5.540
## [31] 7.140 5.560
##
## $qsec
## [1] 32.92 34.04 37.22 38.88 34.04 40.44 31.68 40.00 45.80 36.60 37.80 34.80
## [13] 35.20 36.00 35.96 35.64 34.84 38.94 37.04 39.80 40.02 33.74 34.60 30.82
## [25] 34.10 37.80 33.40 33.80 29.00 31.00 29.20 37.20
##
## $vs
## [1] 0 0 2 2 0 2 0 2 2 2 2 0 0 0 0 0 0 2 2 2 2 0 0 0 0 2 0 2 0 0 0 2
##
## $am
## [1] 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2
##
## $gear
## [1] 8 8 8 6 6 6 6 8 8 8 8 6 6 6 6 6 6 8 8 8 6 6 6 6 6
## [26] 8 10 10 10 10 10 8
##
## $carb
## [1] 8 8 2 2 4 2 8 4 4 8 8 6 6 6 8 8 8 2 4 2 2 4 4 8 4
## [26] 2 4 4 8 12 16 4
mtcars %>% map_dfc(.x = ., .f = ~multiply_by_factor(x = .x, factor = 2))
## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42 12 320 220 7.8 5.24 32.9 0 2 8 8
## 2 42 12 320 220 7.8 5.75 34.0 0 2 8 8
## 3 45.6 8 216 186 7.7 4.64 37.2 2 2 8 2
## 4 42.8 12 516 220 6.16 6.43 38.9 2 0 6 2
## 5 37.4 16 720 350 6.3 6.88 34.0 0 0 6 4
## 6 36.2 12 450 210 5.52 6.92 40.4 2 0 6 2
## 7 28.6 16 720 490 6.42 7.14 31.7 0 0 6 8
## 8 48.8 8 293. 124 7.38 6.38 40 2 0 8 4
## 9 45.6 8 282. 190 7.84 6.3 45.8 2 0 8 4
## 10 38.4 12 335. 246 7.84 6.88 36.6 2 0 8 8
## # ℹ 22 more rows
mtcars %>% map(multiply_by_factor, factor = 2)
## $mpg
## [1] 42.0 42.0 45.6 42.8 37.4 36.2 28.6 48.8 45.6 38.4 35.6 32.8 34.6 30.4 20.8
## [16] 20.8 29.4 64.8 60.8 67.8 43.0 31.0 30.4 26.6 38.4 54.6 52.0 60.8 31.6 39.4
## [31] 30.0 42.8
##
## $cyl
## [1] 12 12 8 12 16 12 16 8 8 12 12 16 16 16 16 16 16 8 8 8 8 16 16 16 16
## [26] 8 8 8 16 12 16 8
##
## $disp
## [1] 320.0 320.0 216.0 516.0 720.0 450.0 720.0 293.4 281.6 335.2 335.2 551.6
## [13] 551.6 551.6 944.0 920.0 880.0 157.4 151.4 142.2 240.2 636.0 608.0 700.0
## [25] 800.0 158.0 240.6 190.2 702.0 290.0 602.0 242.0
##
## $hp
## [1] 220 220 186 220 350 210 490 124 190 246 246 360 360 360 410 430 460 132 104
## [20] 130 194 300 300 490 350 132 182 226 528 350 670 218
##
## $drat
## [1] 7.80 7.80 7.70 6.16 6.30 5.52 6.42 7.38 7.84 7.84 7.84 6.14 6.14 6.14 5.86
## [16] 6.00 6.46 8.16 9.86 8.44 7.40 5.52 6.30 7.46 6.16 8.16 8.86 7.54 8.44 7.24
## [31] 7.08 8.22
##
## $wt
## [1] 5.240 5.750 4.640 6.430 6.880 6.920 7.140 6.380 6.300 6.880
## [11] 6.880 8.140 7.460 7.560 10.500 10.848 10.690 4.400 3.230 3.670
## [21] 4.930 7.040 6.870 7.680 7.690 3.870 4.280 3.026 6.340 5.540
## [31] 7.140 5.560
##
## $qsec
## [1] 32.92 34.04 37.22 38.88 34.04 40.44 31.68 40.00 45.80 36.60 37.80 34.80
## [13] 35.20 36.00 35.96 35.64 34.84 38.94 37.04 39.80 40.02 33.74 34.60 30.82
## [25] 34.10 37.80 33.40 33.80 29.00 31.00 29.20 37.20
##
## $vs
## [1] 0 0 2 2 0 2 0 2 2 2 2 0 0 0 0 0 0 2 2 2 2 0 0 0 0 2 0 2 0 0 0 2
##
## $am
## [1] 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 2 2 2 2 2 2 2
##
## $gear
## [1] 8 8 8 6 6 6 6 8 8 8 8 6 6 6 6 6 6 8 8 8 6 6 6 6 6
## [26] 8 10 10 10 10 10 8
##
## $carb
## [1] 8 8 2 2 4 2 8 4 4 8 8 6 6 6 8 8 8 2 4 2 2 4 4 8 4
## [26] 2 4 4 8 12 16 4
When you have a grouping variable (factor)
reg_coeff_tbl <- mtcars %>%
# Split the data frame into a list by factor
split(.$cyl) %>%
# Repeat the same operation over each element
map(~lm(mpg ~ wt, data = .)) %>%
# Return regression coefficients in a tidy tibble
map(broom::tidy, conf.int = TRUE) %>%
# Bind multiple data frames by row
bind_rows(.id = "cyl") %>%
# Filter for coefficient of interest
filter(term == "wt")
reg_coeff_tbl %>%
mutate(estimate = -estimate,
conf.low = -conf.low,
conf.high = -conf.high) %>%
ggplot(aes(x = estimate, y = cyl)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
theme(legend.position = "none")
Choose either one of the two cases above and apply it to your data
myData1 <- na.omit(myData[, c("year", "country", "city", "stage", "home_team", "away_team", "home_score", "away_score", "outcome", "winning_team", "losing_team", "date", "month", "dayofweek")])
myData1
## # A tibble: 731 × 14
## year country city stage home_team away_team home_score away_score outcome
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 1930 Uruguay Montev… Grou… France Mexico 4 1 H
## 2 1930 Uruguay Montev… Grou… Belgium United S… 0 3 A
## 3 1930 Uruguay Montev… Grou… Brazil Yugoslav… 1 2 A
## 4 1930 Uruguay Montev… Grou… Peru Romania 1 3 A
## 5 1930 Uruguay Montev… Grou… Argentina France 1 0 H
## 6 1930 Uruguay Montev… Grou… Chile Mexico 3 0 H
## 7 1930 Uruguay Montev… Grou… Bolivia Yugoslav… 0 4 A
## 8 1930 Uruguay Montev… Grou… Paraguay United S… 0 3 A
## 9 1930 Uruguay Montev… Grou… Uruguay Peru 1 0 H
## 10 1930 Uruguay Montev… Grou… Argentina Mexico 6 3 H
## # ℹ 721 more rows
## # ℹ 5 more variables: winning_team <chr>, losing_team <chr>, date <date>,
## # month <chr>, dayofweek <chr>
myData55 <- na.omit(myData[, c("year", "home_score", "away_score")])
myData55
## # A tibble: 900 × 3
## year home_score away_score
## <dbl> <dbl> <dbl>
## 1 1930 4 1
## 2 1930 0 3
## 3 1930 1 2
## 4 1930 1 3
## 5 1930 1 0
## 6 1930 3 0
## 7 1930 0 4
## 8 1930 0 3
## 9 1930 1 0
## 10 1930 6 3
## # ℹ 890 more rows
double_by_vector <- function(x, factor) {x * factor}
10 %>% double_by_vector(factor = 2)
## [1] 20
100 %>% double_by_vector(factor = 2)
## [1] 200
myData55 %>% map_dfr(double_by_vector, factor = 10)
## # A tibble: 900 × 3
## year home_score away_score
## <dbl> <dbl> <dbl>
## 1 19300 40 10
## 2 19300 0 30
## 3 19300 10 20
## 4 19300 10 30
## 5 19300 10 0
## 6 19300 30 0
## 7 19300 0 40
## 8 19300 0 30
## 9 19300 10 0
## 10 19300 60 30
## # ℹ 890 more rows
myData4 <- myData1 %>%
filter(home_team %in% c("Brazil", "Uruguay", "Argentina")) %>%
select(home_team, home_score) %>%
pivot_longer(cols = c("home_team"),
names_to = "home_or_away",
values_to = "hometeam")
standard_deviation <- myData4 %>%
group_by(hometeam) %>%
summarise(total_home_score = sum(home_score),
stddev_home_score = sd(home_score))
print(standard_deviation)
## # A tibble: 3 × 3
## hometeam total_home_score stddev_home_score
## <chr> <dbl> <dbl>
## 1 Argentina 110 1.63
## 2 Brazil 170 1.51
## 3 Uruguay 30 1.59
myData5 <- myData4 %>%
group_by(hometeam) %>%
summarise(total_home_score = sum(home_score),
mean_home_score = mean(home_score),
stddev_home_score = sd(home_score))
myData6 <- myData5 %>%
mutate(conf_int_lower = mean_home_score - qt(1 - 0.05 / 2, n() - 1) * (stddev_home_score / sqrt(n())),
conf_int_upper = mean_home_score + qt(1 - 0.05 / 2, n() - 1) * (stddev_home_score / sqrt(n())))
myData6 %>%
ggplot(aes(x = mean_home_score, y = hometeam)) +
geom_point() +
geom_errorbarh(aes(xmin = conf_int_lower, xmax = conf_int_upper), width = 0.4) +
labs(x = "Goals", y = "Team", title = "Mean Home Scores with Confidence Intervals")
## Warning in geom_errorbarh(aes(xmin = conf_int_lower, xmax = conf_int_upper), :
## Ignoring unknown parameters: `width`