Import your data

data("mtcars")
# My dataset
data <- read_excel("myData_charts.xlsx")
data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 45090
Number of columns 10
_______________________
Column type frequency:
character 1
logical 1
numeric 7
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
stock_symbol 2 1 3 5 0 14 0

Variable type: logical

skim_variable n_missing complete_rate mean count
Column1 45090 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
open 2 1 89.27 101.63 1.08 25.67 47.93 128.66 6.962800e+02 ▇▂▁▁▁
high 2 1 90.37 103.00 1.11 25.93 48.46 129.85 7.009900e+02 ▇▂▁▁▁
low 2 1 88.11 100.12 1.00 25.36 47.47 127.25 6.860900e+02 ▇▂▁▁▁
close 2 1 89.27 101.59 1.05 25.66 47.97 128.64 6.916900e+02 ▇▂▁▁▁
adj_close 2 1 85.21 101.00 1.05 22.08 45.38 113.67 6.916900e+02 ▇▁▁▁▁
volume 2 1 52978130.54 93247295.87 589200.00 9629425.00 26463150.00 58397675.00 1.880998e+09 ▇▁▁▁▁
HPR 1 1 0.00 0.02 -0.20 -0.01 0.00 0.01 2.000000e-01 ▁▁▇▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
date 2 1 2010-01-04 2023-01-24 2016-08-09 3287
# Selecting stocks and their closing prices
selected_stocks <- c("AAPL", "ADBE", "AMZN", "CRM", "CSCO", "GOOGL", "IBM", "INTC", "META", "MSFT", "NFLX", "NVDA", "ORCL", "TSLA")

Repeat the same operation over different columns of a data frame

Case of numeric variables

mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x))
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
mtcars %>% map_dbl(.f = ~mean(x = .x))
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
mtcars %>% map_dbl(mean)
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
# Adding an argument
mtcars %>% map_dbl(.x = ., .f = ~mean(x = .x, trim = 0.1))
##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462
mtcars %>% map_dbl(mean, trim = 0.1)
##         mpg         cyl        disp          hp        drat          wt 
##  19.6961538   6.2307692 222.5230769 141.1923077   3.5792308   3.1526923 
##        qsec          vs          am        gear        carb 
##  17.8276923   0.4230769   0.3846154   3.6153846   2.6538462
mtcars %>% select(.data = ., mpg)
##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4
mtcars %>% select(mpg)
##                      mpg
## Mazda RX4           21.0
## Mazda RX4 Wag       21.0
## Datsun 710          22.8
## Hornet 4 Drive      21.4
## Hornet Sportabout   18.7
## Valiant             18.1
## Duster 360          14.3
## Merc 240D           24.4
## Merc 230            22.8
## Merc 280            19.2
## Merc 280C           17.8
## Merc 450SE          16.4
## Merc 450SL          17.3
## Merc 450SLC         15.2
## Cadillac Fleetwood  10.4
## Lincoln Continental 10.4
## Chrysler Imperial   14.7
## Fiat 128            32.4
## Honda Civic         30.4
## Toyota Corolla      33.9
## Toyota Corona       21.5
## Dodge Challenger    15.5
## AMC Javelin         15.2
## Camaro Z28          13.3
## Pontiac Firebird    19.2
## Fiat X1-9           27.3
## Porsche 914-2       26.0
## Lotus Europa        30.4
## Ford Pantera L      15.8
## Ferrari Dino        19.7
## Maserati Bora       15.0
## Volvo 142E          21.4

Create your own function

# Double values in columns
double_by_factor <- function(x, factor) {x * factor}
10 %>% double_by_factor(factor = 2)
## [1] 20
mtcars %>% map_dfr(.x = ., .f = ~double_by_factor(x = .x, factor = 10))
## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1   210    60  1600  1100  39    26.2  165.     0    10    40    40
##  2   210    60  1600  1100  39    28.8  170.     0    10    40    40
##  3   228    40  1080   930  38.5  23.2  186.    10    10    40    10
##  4   214    60  2580  1100  30.8  32.2  194.    10     0    30    10
##  5   187    80  3600  1750  31.5  34.4  170.     0     0    30    20
##  6   181    60  2250  1050  27.6  34.6  202.    10     0    30    10
##  7   143    80  3600  2450  32.1  35.7  158.     0     0    30    40
##  8   244    40  1467   620  36.9  31.9  200     10     0    40    20
##  9   228    40  1408   950  39.2  31.5  229     10     0    40    20
## 10   192    60  1676  1230  39.2  34.4  183     10     0    40    40
## # ℹ 22 more rows
mtcars %>% map_dfr(double_by_factor, factor = 10)
## # A tibble: 32 × 11
##      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1   210    60  1600  1100  39    26.2  165.     0    10    40    40
##  2   210    60  1600  1100  39    28.8  170.     0    10    40    40
##  3   228    40  1080   930  38.5  23.2  186.    10    10    40    10
##  4   214    60  2580  1100  30.8  32.2  194.    10     0    30    10
##  5   187    80  3600  1750  31.5  34.4  170.     0     0    30    20
##  6   181    60  2250  1050  27.6  34.6  202.    10     0    30    10
##  7   143    80  3600  2450  32.1  35.7  158.     0     0    30    40
##  8   244    40  1467   620  36.9  31.9  200     10     0    40    20
##  9   228    40  1408   950  39.2  31.5  229     10     0    40    20
## 10   192    60  1676  1230  39.2  34.4  183     10     0    40    40
## # ℹ 22 more rows

Repeat the same operation over different elements of a list

When you have a grouping variable (factor)

mtcars %>% lm(formula = mpg ~ wt, data = .)
## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##      37.285       -5.344
mtcars %>% distinct(cyl)
##                   cyl
## Mazda RX4           6
## Datsun 710          4
## Hornet Sportabout   8
reg_coeff_tbl <- mtcars %>%
    
    # Split it into a list of data frames
    split(.$cyl) %>%
    
    # Repeat regression over each group
    map(~lm(formula = mpg ~ wt, data = .x)) %>%
    
    # Extract coefficients from regression results
    map(broom::tidy, conf.int = TRUE) %>%
    
    # Convert to tibble
    bind_rows(.id = "cyl") %>%
    
    # Filter for wt coefficients
    filter(term == "wt")
reg_coeff_tbl %>%
    
    mutate(estimate = -estimate, 
           conf.low = -conf.low, 
           conf.high = -conf.high) %>%
    
    ggplot(aes(x = estimate, y = cyl)) +
    geom_point() + 
    geom_errorbar(aes(xmin = conf.low, xmax = conf.high))

Create your own

Choose either one of the two cases above and apply it to your data

# Calculate the mean of numeric columns per stock
data %>% skimr::skim()
Data summary
Name Piped data
Number of rows 45090
Number of columns 10
_______________________
Column type frequency:
character 1
logical 1
numeric 7
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
stock_symbol 2 1 3 5 0 14 0

Variable type: logical

skim_variable n_missing complete_rate mean count
Column1 45090 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
open 2 1 89.27 101.63 1.08 25.67 47.93 128.66 6.962800e+02 ▇▂▁▁▁
high 2 1 90.37 103.00 1.11 25.93 48.46 129.85 7.009900e+02 ▇▂▁▁▁
low 2 1 88.11 100.12 1.00 25.36 47.47 127.25 6.860900e+02 ▇▂▁▁▁
close 2 1 89.27 101.59 1.05 25.66 47.97 128.64 6.916900e+02 ▇▂▁▁▁
adj_close 2 1 85.21 101.00 1.05 22.08 45.38 113.67 6.916900e+02 ▇▁▁▁▁
volume 2 1 52978130.54 93247295.87 589200.00 9629425.00 26463150.00 58397675.00 1.880998e+09 ▇▁▁▁▁
HPR 1 1 0.00 0.02 -0.20 -0.01 0.00 0.01 2.000000e-01 ▁▁▇▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
date 2 1 2010-01-04 2023-01-24 2016-08-09 3287
mean_values_per_stock <- data %>%
    group_by(stock_symbol) %>%
    summarise(across(where(is.numeric), mean, na.rm = TRUE))
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), mean, na.rm = TRUE)`.
## ℹ In group 1: `stock_symbol = "AAPL"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
mean_values_per_stock
## # A tibble: 15 × 8
##    stock_symbol  open  high   low close adj_close     volume         HPR
##    <chr>        <dbl> <dbl> <dbl> <dbl>     <dbl>      <dbl>       <dbl>
##  1 AAPL          51.3  51.8  50.7  51.3      49.4 256325536.  0.000270  
##  2 ADBE         186.  188.  184.  186.      186.    3814337.  0.000549  
##  3 AMZN          58.9  59.6  58.2  58.9      58.9  88339985.  0.000103  
##  4 CRM          103.  105.  102.  103.      103.    6910973.  0.000317  
##  5 CSCO          33.5  33.8  33.2  33.5      28.6  32696560.  0.000308  
##  6 GOOGL         49.1  49.6  48.6  49.1      49.1  60186469. -0.0000358 
##  7 IBM          148.  150.  147.  148.      113.    5036545.  0.000364  
##  8 INTC          36.5  36.9  36.1  36.5      31.3  36071696.  0.000415  
##  9 META         148.  150.  146.  148.      148.   31178147.  0.000163  
## 10 MSFT         100.  101.   99.0 100.       95.3  38016469.  0.000390  
## 11 NFLX         188.  191.  185.  188.      188.   18414854.  0.000899  
## 12 NVDA          50.6  51.5  49.5  50.6      50.3  50806127.  0.000375  
## 13 ORCL          46.2  46.7  45.8  46.3      42.6  18018559.  0.000545  
## 14 TSLA          58.9  60.2  57.4  58.8      58.8  93516466.  0.00000684
## 15 <NA>         NaN   NaN   NaN   NaN       NaN         NaN   0.000270
# Define a function to calculate average high price per stock symbol
calculate_average_high_price <- function(data, stock_symbol_column, high_price_column) {
    
    # Filter out rows with missing or NaN values in the high price column
    cleaned_data <- data %>%
        filter(!is.na({{ high_price_column }}), !is.nan({{ high_price_column }}))
    
    # Group by stock symbol and summarize to calculate average high price
    average_high_price <- cleaned_data %>%
        group_by({{ stock_symbol_column }}) %>%
        summarise(average_high = mean({{ high_price_column }}, na.rm = TRUE))
    
    return(average_high_price)
}

# Example usage of the function, assuming high price is represented by 'high' column
average_high_price_per_stock <- calculate_average_high_price(data, stock_symbol, high)
average_high_price_per_stock
## # A tibble: 14 × 2
##    stock_symbol average_high
##    <chr>               <dbl>
##  1 AAPL                 51.8
##  2 ADBE                188. 
##  3 AMZN                 59.6
##  4 CRM                 105. 
##  5 CSCO                 33.8
##  6 GOOGL                49.6
##  7 IBM                 150. 
##  8 INTC                 36.9
##  9 META                150. 
## 10 MSFT                101. 
## 11 NFLX                191. 
## 12 NVDA                 51.5
## 13 ORCL                 46.7
## 14 TSLA                 60.2
# Create a bar plot using ggplot with different colors for each stock symbol
ggplot(average_high_price_per_stock, aes(x = stock_symbol, y = average_high, label = round(average_high, 2), fill = stock_symbol)) +
    geom_bar(stat = "identity") +
    geom_text(vjust = -0.5) +
    labs(title = "Average High Price per Stock Symbol",
         x = "Stock Symbol",
         y = "Average High Price") +
    theme_minimal() +
    theme(legend.position = "none")