Module 13: Apply it to your data 12

Import your data

data("mtcars")

Mydata <- read_csv("../00_data/tdf_winners.csv")

## Rows: 106 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): winner_name, winner_team, full_name, nickname, birth_town, birth_c...
## dbl  (9): edition, distance, time_overall, time_margin, stage_wins, stages_l...
## date (3): start_date, born, died
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Mydata

## # A tibble: 106 × 19
##    edition start_date winner_name  winner_team distance time_overall time_margin
##      <dbl> <date>     <chr>        <chr>          <dbl>        <dbl>       <dbl>
##  1       1 1903-07-01 Maurice Gar… La Françai…     2428         94.6        2.99
##  2       2 1904-07-02 Henri Cornet Conte           2428         96.1        2.27
##  3       3 1905-07-09 Louis Trous… Peugeot–Wo…     2994         NA         NA   
##  4       4 1906-07-04 René Pottier Peugeot–Wo…     4637         NA         NA   
##  5       5 1907-07-08 Lucien Peti… Peugeot–Wo…     4488         NA         NA   
##  6       6 1908-07-13 Lucien Peti… Peugeot–Wo…     4497         NA         NA   
##  7       7 1909-07-05 François Fa… Alcyon–Dun…     4498         NA         NA   
##  8       8 1910-07-01 Octave Lapi… Alcyon–Dun…     4734         NA         NA   
##  9       9 1911-07-02 Gustave Gar… Alcyon–Dun…     5343         NA         NA   
## 10      10 1912-06-30 Odile Defra… Alcyon–Dun…     5289         NA         NA   
## # ℹ 96 more rows
## # ℹ 12 more variables: stage_wins <dbl>, stages_led <dbl>, height <dbl>,
## #   weight <dbl>, age <dbl>, born <date>, died <date>, full_name <chr>,
## #   nickname <chr>, birth_town <chr>, birth_country <chr>, nationality <chr>

Repeat the same operation over different columns of a data frame

Case of numeric variables

mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x))

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

mtcars %>% map_dbl(.f = ~mean(x = .x))

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

mtcars %>% map_dbl(.f = mean)

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

mtcars %>% map_dbl(mean)

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

# adding an argument
mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x, trim = 0.1))

##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462

mtcars %>% map_dbl(mean, trim = 0.1)

##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462

mtcars %>% select(.data = ., mpg)

##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4

mtcars %>% select(mpg)

##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4

Create your own function

# Checking correlation between columns
my_correlation <- function(data, col1, col2) {
  cor(data[[col1]], data[[col2]])
}

# Mpg and cyl are negatively correlated, so as the number of cylinders increase, the car is less fuel efficient
my_correlation(mtcars, "mpg", "cyl")

## [1] -0.852162

Repeat the same operation over different elements of a list

When you have a grouping variable (factor)

mtcars %>% lm(formula = mpg ~ wt, data = .)

## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##      37.285       -5.344

mtcars %>% distinct(cyl)

##                   cyl
## Mazda RX4           6
## Datsun 710          4
## Hornet Sportabout   8

reg_coeff_tbl <- mtcars %>% 
    
    # Split it into a list of data frames
    split(.$cyl) %>%
    
    # Repeat regression over each group
    map(~lm(formula = mpg ~ wt, data = .x)) %>%
    
    # Extract coefficients from regression results
    map(broom::tidy, conf.int = TRUE) %>% 
    
    # Convert to tibble
    bind_rows(.id = "cyl") %>%
    
    # Filter or wt coefficients
    filter(term == "wt")

reg_coeff_tbl %>% 
    
    mutate(estimate = -estimate,
           conf.low = -conf.low,
           conf.high = -conf.high) %>%
    
    ggplot(aes(x = estimate, y = cyl)) +
    geom_point() + 
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high))

Create your own

Choose either one of the two cases above and apply it to your data

Mydata_select <- Mydata %>% select(stage_wins, stages_led, age, distance)

double_by_vector <- function(x, factor) {x * factor}


Mydata_select %>% map_dfr(double_by_vector, factor = 10)

## # A tibble: 106 × 4
##    stage_wins stages_led   age distance
##         <dbl>      <dbl> <dbl>    <dbl>
##  1         30         60   320    24280
##  2         10         30   190    24280
##  3         50        100   240    29940
##  4         50        120   270    46370
##  5         20         50   240    44880
##  6         50        130   250    44970
##  7         60        130   220    44980
##  8         40         30   220    47340
##  9         20        130   260    53430
## 10         30        130   230    52890
## # ℹ 96 more rows

Module 13: Apply it to your data 12

Chapter 21 Iteration

Angus Somers

Import your data

Repeat the same operation over different columns of a data frame

Repeat the same operation over different elements of a list

Create your own