Module 13: Apply it to your data 12

Import your data

data("mtcars")

# excel file
airlines <- read_excel("../00_data/MyData.xlsx") %>%
     mutate(n_events = as.numeric(n_events)) %>%
    mutate(avail_seat_km_per_week = as.numeric(avail_seat_km_per_week))
airlines

## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## #   ²n_events

airlines

## # A tibble: 336 × 6
##      Ref airline               avail_seat_km_per_week year_range type_…¹ n_eve…²
##    <dbl> <chr>                                  <dbl> <chr>      <chr>     <dbl>
##  1    NA Aer Lingus                         320906734 85_99      incide…       2
##  2     2 Aeroflot*                         1197672318 85_99      incide…      76
##  3     3 Aerolineas Argentinas              385803648 85_99      incide…       6
##  4     4 Aeromexico*                        596871813 85_99      incide…       3
##  5     5 Air Canada                        1865253802 85_99      incide…       2
##  6     6 Air France                        3004002661 85_99      incide…      14
##  7     7 Air India*                         869253552 85_99      incide…       2
##  8     8 Air New Zealand*                   710174817 85_99      incide…       3
##  9     9 Alaska Airlines*                   965346773 85_99      incide…       5
## 10    10 Alitalia                           698012498 85_99      incide…       7
## # … with 326 more rows, and abbreviated variable names ¹type_of_event,
## #   ²n_events

Repeat the same operation over different columns of a data frame

Case of numeric variables

mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x))

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

mtcars %>% map(.f = ~mean(x = .x))

## $mpg
## [1] 20.09062
## 
## $cyl
## [1] 6.1875
## 
## $disp
## [1] 230.7219
## 
## $hp
## [1] 146.6875
## 
## $drat
## [1] 3.596563
## 
## $wt
## [1] 3.21725
## 
## $qsec
## [1] 17.84875
## 
## $vs
## [1] 0.4375
## 
## $am
## [1] 0.40625
## 
## $gear
## [1] 3.6875
## 
## $carb
## [1] 2.8125

mtcars %>% map_dbl(mean)

##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

# Adding an argument
mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x, trim = 0.1))

##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462

mtcars %>% map_dbl(mean, trim = 0.1)

##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462

mtcars %>% select(.data = .,mpg)

##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4

mtcars %>% select(mpg)

##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4

Create your own function

# Double values in columns
double_by_factor <- function(x, factor) {x * factor}

10 %>% double_by_factor(factor = 2)

## [1] 20

mtcars %>% map_dfr(.x = ., .f = ~double_by_factor(factor = 10, x = .x))

## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1   210    60  1600  1100  39    26.2  165.     0    10    40    40
##  2   210    60  1600  1100  39    28.8  170.     0    10    40    40
##  3   228    40  1080   930  38.5  23.2  186.    10    10    40    10
##  4   214    60  2580  1100  30.8  32.2  194.    10     0    30    10
##  5   187    80  3600  1750  31.5  34.4  170.     0     0    30    20
##  6   181    60  2250  1050  27.6  34.6  202.    10     0    30    10
##  7   143    80  3600  2450  32.1  35.7  158.     0     0    30    40
##  8   244    40  1467   620  36.9  31.9  200     10     0    40    20
##  9   228    40  1408   950  39.2  31.5  229     10     0    40    20
## 10   192    60  1676  1230  39.2  34.4  183     10     0    40    40
## # … with 22 more rows

mtcars %>% map_dfr(double_by_factor, factor = 10)

## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1   210    60  1600  1100  39    26.2  165.     0    10    40    40
##  2   210    60  1600  1100  39    28.8  170.     0    10    40    40
##  3   228    40  1080   930  38.5  23.2  186.    10    10    40    10
##  4   214    60  2580  1100  30.8  32.2  194.    10     0    30    10
##  5   187    80  3600  1750  31.5  34.4  170.     0     0    30    20
##  6   181    60  2250  1050  27.6  34.6  202.    10     0    30    10
##  7   143    80  3600  2450  32.1  35.7  158.     0     0    30    40
##  8   244    40  1467   620  36.9  31.9  200     10     0    40    20
##  9   228    40  1408   950  39.2  31.5  229     10     0    40    20
## 10   192    60  1676  1230  39.2  34.4  183     10     0    40    40
## # … with 22 more rows

Repeat the same operation over different elements of a list

When you have a grouping variable (factor)

mtcars %>% lm(formula = mpg ~ wt, data = .)

## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##      37.285       -5.344

mtcars %>% distinct(cyl)

##                   cyl
## Mazda RX4           6
## Datsun 710          4
## Hornet Sportabout   8

regression_coefficient_tibble <- mtcars %>%
    
    # Split to a list of data frames
    split(.$cyl)%>%

    # Repeat regression over each group
    
    map(~lm(formula = mpg ~ wt, data = .)) %>%
    
    # Extract coefficient from regression results
    map(broom::tidy, conf.int = TRUE) %>%
    
    # Convert to tibble
    bind_rows(.id = "cyl") %>%
    
    # Filter wt coefficients
    filter(term == "wt")

regression_coefficient_tibble%>%
    
    mutate(estimate = -estimate,
           conf.low = -conf.low,
           conf.high = -conf.high) %>%
    
    ggplot(aes(x = estimate,y = cyl)) +
    geom_point() +
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high))

Create your own

Choose either one of the two cases above and apply it to your data

airlines %>% lm(formula = n_events ~ type_of_event)

## 
## Call:
## lm(formula = n_events ~ type_of_event, data = .)
## 
## Coefficients:
##             (Intercept)  type_of_eventfatalities   type_of_eventincidents  
##                   1.420                   82.545                    4.232

airlines %>% distinct(airline)

## # A tibble: 56 × 1
##    airline              
##    <chr>                
##  1 Aer Lingus           
##  2 Aeroflot*            
##  3 Aerolineas Argentinas
##  4 Aeromexico*          
##  5 Air Canada           
##  6 Air France           
##  7 Air India*           
##  8 Air New Zealand*     
##  9 Alaska Airlines*     
## 10 Alitalia             
## # … with 46 more rows

regression_coefficient_tibble <- airlines %>%
    
    # Split to a list of data frames
    split(.$airline)%>%

    # Repeat regression over each group
    
    map(~lm(formula = n_events ~ type_of_event, data = .)) %>%
    
    # Extract coefficient from regression results
    map(broom::tidy, conf.int = TRUE) %>%
    
    # Convert to tibble
    bind_rows(.id = "airline") %>%
    
    # Filter wt coefficients
    filter(term == "type_of_eventfatalities")

## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable

## Warning in summary.lm(object, ...): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable

## Warning in summary.lm(object, ...): essentially perfect fit: summary may be
## unreliable

regression_coefficient_tibble

## # A tibble: 56 × 8
##    airline         term   estimate std.er…¹ statistic p.value  conf.low conf.h…²
##    <chr>           <chr>     <dbl>    <dbl>     <dbl>   <dbl>     <dbl>    <dbl>
##  1 Aer Lingus      type…  2.07e-16 8.16e- 1  2.54e-16  1      -2.60e+ 0 2.60e+ 0
##  2 Aeroflot*       type…  1.01e+ 2 3.33e+ 1  3.01e+ 0  0.0570 -5.60e+ 0 2.07e+ 2
##  3 Aerolineas Arg… type… -2.52e-16 2.04e+ 0 -1.23e-16  1      -6.50e+ 0 6.50e+ 0
##  4 Aeromexico*     type…  3.15e+ 1 2.61e+ 1  1.20e+ 0  0.315  -5.17e+ 1 1.15e+ 2
##  5 Air Canada      type…  0        1.28e-17  0         1      -4.07e-17 4.07e-17
##  6 Air France      type…  2.05e+ 2 1.05e+ 2  1.95e+ 0  0.147  -1.30e+ 2 5.40e+ 2
##  7 Air India*      type…  2.42e+ 2 6.98e+ 1  3.47e+ 0  0.0402  2.03e+ 1 4.65e+ 2
##  8 Air New Zealan… type…  3   e+ 0 3   e+ 0  1   e+ 0  0.391  -6.55e+ 0 1.25e+ 1
##  9 Alaska Airline… type…  4.35e+ 1 3.59e+ 1  1.21e+ 0  0.313  -7.08e+ 1 1.58e+ 2
## 10 Alitalia        type…  2.4 e+ 1 2.05e+ 1  1.17e+ 0  0.326  -4.11e+ 1 8.91e+ 1
## # … with 46 more rows, and abbreviated variable names ¹std.error, ²conf.high

regression_coefficient_tibble <- regression_coefficient_tibble %>%

    mutate(estimate = -estimate,
           conf.low = -conf.low,
           conf.high = -conf.high) %>%
    
    ggplot(aes(x = estimate, y = term)) +
    geom_point() +
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high))

Module 13: Apply it to your data 12

Chapter 21 Iteration

Reed Wilson

Import your data

Repeat the same operation over different columns of a data frame

Repeat the same operation over different elements of a list

Create your own