tidy_tours <- raw_data |>
clean_names() |>
rename(
actual_gross_usd = actual_gross,
adjusted_gross_2022_usd = adjusted_gross_in_2022_dollars,
years = year_s
) |>
mutate(
across(c(actual_gross_usd, adjusted_gross_2022_usd, average_gross),
~as.numeric(str_remove_all(as.character(.x), "[\\$,\\[\\]a-z†‡*]"))),
start_year = as.numeric(str_extract(years, "^\\d{4}")),
tour_title = str_remove_all(tour_title, "[†‡*]") |> str_trim()
) |>
pivot_longer(
cols = c(actual_gross_usd, adjusted_gross_2022_usd),
names_to = "valuation_type",
values_to = "gross_amount"
) |>
select(artist, tour_title, start_year, shows, valuation_type, gross_amount, average_gross) |>
drop_na(gross_amount)
head(tidy_tours)