data("mtcars")
Mydata <- read_csv("../00_data/tdf_winners.csv")
## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Mydata
## # A tibble: 106 × 19
## edition start_date winner_name winner_team distance time_overall time_margin
## <dbl> <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 1903-07-01 Maurice Gar… La Françai… 2428 94.6 2.99
## 2 2 1904-07-02 Henri Cornet Conte 2428 96.1 2.27
## 3 3 1905-07-09 Louis Trous… Peugeot–Wo… 2994 NA NA
## 4 4 1906-07-04 René Pottier Peugeot–Wo… 4637 NA NA
## 5 5 1907-07-08 Lucien Peti… Peugeot–Wo… 4488 NA NA
## 6 6 1908-07-13 Lucien Peti… Peugeot–Wo… 4497 NA NA
## 7 7 1909-07-05 François Fa… Alcyon–Dun… 4498 NA NA
## 8 8 1910-07-01 Octave Lapi… Alcyon–Dun… 4734 NA NA
## 9 9 1911-07-02 Gustave Gar… Alcyon–Dun… 5343 NA NA
## 10 10 1912-06-30 Odile Defra… Alcyon–Dun… 5289 NA NA
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## # weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## # nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>
Case of numeric variables
mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x))
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
mtcars %>% map_dbl(.f = ~mean(x = .x))
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
mtcars %>% map_dbl(.f = mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
mtcars %>% map_dbl(mean)
## mpg cyl disp hp drat wt qsec
## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 17.848750
## vs am gear carb
## 0.437500 0.406250 3.687500 2.812500
# adding an argument
mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x, trim = 0.1))
## mpg cyl disp hp drat wt
## 19.6961538 6.2307692 222.5230769 141.1923077 3.5792308 3.1526923
## qsec vs am gear carb
## 17.8276923 0.4230769 0.3846154 3.6153846 2.6538462
mtcars %>% map_dbl(mean, trim = 0.1)
## mpg cyl disp hp drat wt
## 19.6961538 6.2307692 222.5230769 141.1923077 3.5792308 3.1526923
## qsec vs am gear carb
## 17.8276923 0.4230769 0.3846154 3.6153846 2.6538462
mtcars %>% select(.data = ., mpg)
## mpg
## Mazda RX4 21.0
## Mazda RX4 Wag 21.0
## Datsun 710 22.8
## Hornet 4 Drive 21.4
## Hornet Sportabout 18.7
## Valiant 18.1
## Duster 360 14.3
## Merc 240D 24.4
## Merc 230 22.8
## Merc 280 19.2
## Merc 280C 17.8
## Merc 450SE 16.4
## Merc 450SL 17.3
## Merc 450SLC 15.2
## Cadillac Fleetwood 10.4
## Lincoln Continental 10.4
## Chrysler Imperial 14.7
## Fiat 128 32.4
## Honda Civic 30.4
## Toyota Corolla 33.9
## Toyota Corona 21.5
## Dodge Challenger 15.5
## AMC Javelin 15.2
## Camaro Z28 13.3
## Pontiac Firebird 19.2
## Fiat X1-9 27.3
## Porsche 914-2 26.0
## Lotus Europa 30.4
## Ford Pantera L 15.8
## Ferrari Dino 19.7
## Maserati Bora 15.0
## Volvo 142E 21.4
mtcars %>% select(mpg)
## mpg
## Mazda RX4 21.0
## Mazda RX4 Wag 21.0
## Datsun 710 22.8
## Hornet 4 Drive 21.4
## Hornet Sportabout 18.7
## Valiant 18.1
## Duster 360 14.3
## Merc 240D 24.4
## Merc 230 22.8
## Merc 280 19.2
## Merc 280C 17.8
## Merc 450SE 16.4
## Merc 450SL 17.3
## Merc 450SLC 15.2
## Cadillac Fleetwood 10.4
## Lincoln Continental 10.4
## Chrysler Imperial 14.7
## Fiat 128 32.4
## Honda Civic 30.4
## Toyota Corolla 33.9
## Toyota Corona 21.5
## Dodge Challenger 15.5
## AMC Javelin 15.2
## Camaro Z28 13.3
## Pontiac Firebird 19.2
## Fiat X1-9 27.3
## Porsche 914-2 26.0
## Lotus Europa 30.4
## Ford Pantera L 15.8
## Ferrari Dino 19.7
## Maserati Bora 15.0
## Volvo 142E 21.4
Create your own function
# Checking correlation between columns
my_correlation <- function(data, col1, col2) {
cor(data[[col1]], data[[col2]])
}
# Mpg and cyl are negatively correlated, so as the number of cylinders increase, the car is less fuel efficient
my_correlation(mtcars, "mpg", "cyl")
## [1] -0.852162
When you have a grouping variable (factor)
mtcars %>% lm(formula = mpg ~ wt, data = .)
##
## Call:
## lm(formula = mpg ~ wt, data = .)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
mtcars %>% distinct(cyl)
## cyl
## Mazda RX4 6
## Datsun 710 4
## Hornet Sportabout 8
reg_coeff_tbl <- mtcars %>%
# Split it into a list of data frames
split(.$cyl) %>%
# Repeat regression over each group
map(~lm(formula = mpg ~ wt, data = .x)) %>%
# Extract coefficients from regression results
map(broom::tidy, conf.int = TRUE) %>%
# Convert to tibble
bind_rows(.id = "cyl") %>%
# Filter or wt coefficients
filter(term == "wt")
reg_coeff_tbl %>%
mutate(estimate = -estimate,
conf.low = -conf.low,
conf.high = -conf.high) %>%
ggplot(aes(x = estimate, y = cyl)) +
geom_point() +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high))
Choose either one of the two cases above and apply it to your data
Mydata_select <- Mydata %>% select(stage_wins, stages_led, age, distance)
double_by_vector <- function(x, factor) {x * factor}
Mydata_select %>% map_dfr(double_by_vector, factor = 10)
## # A tibble: 106 × 4
## stage_wins stages_led age distance
## <dbl> <dbl> <dbl> <dbl>
## 1 30 60 320 24280
## 2 10 30 190 24280
## 3 50 100 240 29940
## 4 50 120 270 46370
## 5 20 50 240 44880
## 6 50 130 250 44970
## 7 60 130 220 44980
## 8 40 30 220 47340
## 9 20 130 260 53430
## 10 30 130 230 52890
## # ℹ 96 more rows