MyData <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-11-26/cbp_resp.csv')
## Rows: 68815 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): month_grouping, month_abbv, component, land_border_region, area_of...
## dbl (2): fiscal_year, encounter_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Transform Data: calculate average encounter count hours by land_border_region
encountercount_by_region <- MyData %>%
group_by(land_border_region) %>%
summarise(
avg_encounter_count = mean(encounter_count, na.rm = TRUE))
encountercount_by_region
## # A tibble: 3 × 2
## land_border_region avg_encounter_count
## <chr> <dbl>
## 1 Northern Land Border 46.8
## 2 Other 73.2
## 3 Southwest Land Border 276.
# Plot
encountercount_by_region %>%
ggplot(aes(x = avg_encounter_count, y = land_border_region)) +
geom_point()
Ordered factor levels
encountercount_by_region %>%
ggplot(aes(x = avg_encounter_count, y = fct_reorder(.f = land_border_region, .x = avg_encounter_count))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean Encounter Count at Land Border Region")
Moving a single level to the front
encountercount_by_region %>%
ggplot(aes(x = avg_encounter_count,
y = fct_reorder(.f = land_border_region, .x = avg_encounter_count) %>%
fct_relevel("Don't know"))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean Encounter Count at Land Border Region")
## Warning: 1 unknown level in `f`: Don't know
Make two bar charts here - one before ordering another after
# Before Ordering
MyData %>%
ggplot(aes(x = encounter_count, y = land_border_region)) +
geom_point()
# After Ordering
encountercount_by_region %>%
ggplot(aes(x = avg_encounter_count, y = land_border_region)) +
geom_point()
Show examples of three functions:
MyData %>%
mutate(month_abbv_rev = fct_recode(month_abbv, "April" = "APR")) %>%
select(month_abbv, month_abbv_rev) %>%
filter(month_abbv == "April")
## # A tibble: 0 × 2
## # ℹ 2 variables: month_abbv <chr>, month_abbv_rev <fct>
MyData %>%
mutate(citizenship_col = fct_collapse(citizenship, "Southern America" = c("MEXICO"))) %>%
select(citizenship, citizenship_col) %>%
filter(citizenship != "China")
## # A tibble: 68,815 × 2
## citizenship citizenship_col
## <chr> <fct>
## 1 BRAZIL BRAZIL
## 2 CANADA CANADA
## 3 CANADA CANADA
## 4 CANADA CANADA
## 5 CHINA, PEOPLES REPUBLIC OF CHINA, PEOPLES REPUBLIC OF
## 6 CHINA, PEOPLES REPUBLIC OF CHINA, PEOPLES REPUBLIC OF
## 7 OTHER OTHER
## 8 OTHER OTHER
## 9 PHILIPPINES PHILIPPINES
## 10 RUSSIA RUSSIA
## # ℹ 68,805 more rows
MyData %>% count(citizenship)
## # A tibble: 22 × 2
## citizenship n
## <chr> <int>
## 1 BRAZIL 3040
## 2 CANADA 2527
## 3 CHINA, PEOPLES REPUBLIC OF 2661
## 4 COLOMBIA 3620
## 5 CUBA 3147
## 6 ECUADOR 3262
## 7 EL SALVADOR 3787
## 8 GUATEMALA 4394
## 9 HAITI 2565
## 10 HONDURAS 4253
## # ℹ 12 more rows
MyData %>%
mutate(citizenship_lump = fct_lump(citizenship, prop = 0.03)) %>%
distinct(citizenship_lump)
## # A tibble: 19 × 1
## citizenship_lump
## <fct>
## 1 BRAZIL
## 2 CANADA
## 3 CHINA, PEOPLES REPUBLIC OF
## 4 OTHER
## 5 Other
## 6 RUSSIA
## 7 COLOMBIA
## 8 GUATEMALA
## 9 HAITI
## 10 INDIA
## 11 MEXICO
## 12 CUBA
## 13 EL SALVADOR
## 14 UKRAINE
## 15 VENEZUELA
## 16 HONDURAS
## 17 PERU
## 18 ECUADOR
## 19 NICARAGUA
No need to do anything here.