salaries <- read_excel("../00_data/my.Data.xlsx", sheet = "in")
## New names:
## • `` -> `...1`
salaries <- as_tibble(salaries)
Case of numeric variables
# Calculate the mean for all numeric columns
salaries %>%
select(where(is.numeric)) %>%
map_dbl(~mean(.x, na.rm = TRUE))
## ...1 differential release_year
## 346.00000 -12.32272 1982.87265
## peak_billboard_position
## 61.19392
# Calculate trimmed mean for all numeric columns
salaries %>%
select(where(is.numeric)) %>%
map_dbl(~mean(.x, trim = 0.1, na.rm = TRUE))
## ...1 differential release_year
## 346.00000 -10.66727 1981.85353
## peak_billboard_position
## 51.26040
# Calculate the mean for a specific numeric column (`differential`)
salaries %>%
select(differential) %>%
map_dbl(mean, na.rm = TRUE)
## differential
## -12.32272
Create your own function
# Convert columns to numeric and handle potential issues
salaries <- salaries %>%
mutate(
rank_2003 = as.numeric(rank_2003),
rank_2020 = as.numeric(rank_2020)
)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `rank_2003 = as.numeric(rank_2003)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Function to multiply values by a factor
double_by_factor <- function(x, factor) {
if (!is.numeric(x)) stop("Input must be numeric") # Ensure numeric input
x * factor
}
# Apply the function to specific columns
salaries %>%
select(rank_2003, rank_2020) %>%
map_dfr(~double_by_factor(.x, factor = 10))
## # A tibble: 691 × 2
## rank_2003 rank_2020
## <dbl> <dbl>
## 1 1000 2820
## 2 2140 4550
## 3 550 3320
## 4 3060 NA
## 5 500 2270
## 6 NA 320
## 7 NA 330
## 8 4210 NA
## 9 NA 680
## 10 120 310
## # ℹ 681 more rows
When you have a grouping variable (factor)
salaries %>%
lm(formula = differential ~ rank_2020, data = .)
##
## Call:
## lm(formula = differential ~ rank_2020, data = .)
##
## Coefficients:
## (Intercept) rank_2020
## 156.8727 -0.3846
salaries %>%
distinct(genre)
## # A tibble: 17 × 1
## genre
## <chr>
## 1 Big Band/Jazz
## 2 Rock n' Roll/Rhythm & Blues
## 3 NA
## 4 Soul/Gospel/R&B
## 5 Hip-Hop/Rap
## 6 Blues/Blues Rock
## 7 Country/Folk/Country Rock/Folk Rock
## 8 Indie/Alternative Rock
## 9 Punk/Post-Punk/New Wave/Power Pop
## 10 Electronic
## 11 Funk/Disco
## 12 Latin
## 13 Hard Rock/Metal
## 14 Singer-Songwriter/Heartland Rock
## 15 Blues/Blues ROck
## 16 Reggae
## 17 Afrobeat
reg_coeff_tbl <- salaries %>%
filter(!is.na(rank_2003) & !is.na(rank_2020) & !is.na(differential)) %>% # Remove missing data
group_split(rank_2003) %>% # Group by `rank_2003`
map(~lm(formula = differential ~ rank_2020, data = .x)) %>% # Regression per group
map(broom::tidy, conf.int = TRUE) %>% # Extract coefficients
bind_rows(.id = "rank_group") %>% # Combine results
filter(term == "rank_2020") # Filter for `rank_2020` coefficient
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
# Visualize regression coefficients
reg_coeff_tbl %>%
mutate(
estimate = -estimate,
conf.low = -conf.low,
conf.high = -conf.high
) %>%
ggplot(aes(x = estimate, y = rank_group)) +
geom_point() +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high)) +
labs(
title = "Regression Coefficients by Rank Group",
x = "Coefficient Estimate",
y = "Rank Group"
)
## Warning: Removed 324 rows containing missing values or values outside the scale range
## (`geom_point()`).
Choose either one of the two cases above and apply it to your data
genre_coeff_tbl <- salaries %>%
filter(!is.na(genre) & !is.na(rank_2020) & !is.na(differential)) %>% # Remove missing data
group_split(genre) %>% # Group by `genre`
map(~lm(formula = differential ~ rank_2020, data = .x)) %>% # Regression per group
map(broom::tidy, conf.int = TRUE) %>% # Extract coefficients
bind_rows(.id = "genre_group") %>% # Combine results
filter(term == "rank_2020") # Filter for `rank_2020` coefficient
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in qt(a, object$df.residual): NaNs produced
## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable
## Warning in summary.lm(object, ...): essentially perfect fit: summary may be
## unreliable
# Visualize grouped regression coefficients
genre_coeff_tbl %>%
ggplot(aes(x = estimate, y = genre_group)) +
geom_point() +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high)) +
labs(
title = "Regression Coefficients by Genre",
x = "Coefficient Estimate",
y = "Genre")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).