bee_colonies <- read_excel("../00_data/MyData3.xlsx")
Unordered factor levels
# Average colony_lost for each period
colonylost_by_period <- bee_colonies %>%
group_by(months) %>%
summarise(
avg_colonylost = mean(colony_lost, na.rm = TRUE)
)
colonylost_by_period
## # A tibble: 4 × 2
## months avg_colonylost
## <chr> <dbl>
## 1 April-June 11963.
## 2 January-March 17604.
## 3 July-September 17536.
## 4 October-December 18927.
# Plot
colonylost_by_period %>%
ggplot(aes(x = avg_colonylost, y = months)) +
geom_point()
Ordered factor levels
colonylost_by_period %>%
ggplot(aes(x = avg_colonylost, y = fct_reorder(.f = months, .x = avg_colonylost))) +
geom_point() +
# Labels
labs(y = NULL, x = "Mean colony lost each period")
bee_colonies %>% distinct(months)
## # A tibble: 4 × 1
## months
## <chr>
## 1 January-March
## 2 April-June
## 3 July-September
## 4 October-December
# Recode
bee_colonies %>%
# Rename levels
mutate(period_rev = fct_recode(months, "Spring" = "January-March")) %>%
select(months, period_rev) %>%
filter(period_rev == "Spring")
## # A tibble: 329 × 2
## months period_rev
## <chr> <fct>
## 1 January-March Spring
## 2 January-March Spring
## 3 January-March Spring
## 4 January-March Spring
## 5 January-March Spring
## 6 January-March Spring
## 7 January-March Spring
## 8 January-March Spring
## 9 January-March Spring
## 10 January-March Spring
## # ℹ 319 more rows
# Collapse multiple levels into one
bee_colonies %>%
mutate(state_col = fct_collapse(state, "Unknown state" = c("Other States", "United States"))) %>%
select(state, state_col)
## # A tibble: 1,222 × 2
## state state_col
## <chr> <fct>
## 1 Alabama Alabama
## 2 Arizona Arizona
## 3 Arkansas Arkansas
## 4 California California
## 5 Colorado Colorado
## 6 Connecticut Connecticut
## 7 Florida Florida
## 8 Georgia Georgia
## 9 Hawaii Hawaii
## 10 Idaho Idaho
## # ℹ 1,212 more rows
# Lump small levels into other levels
bee_colonies %>% count(state)
## # A tibble: 47 × 2
## state n
## <chr> <int>
## 1 Alabama 26
## 2 Arizona 26
## 3 Arkansas 26
## 4 California 26
## 5 Colorado 26
## 6 Connecticut 26
## 7 Florida 26
## 8 Georgia 26
## 9 Hawaii 26
## 10 Idaho 26
## # ℹ 37 more rows
bee_colonies %>% mutate(state_lump = fct_lump(state)) %>% distinct(state_lump)
## # A tibble: 47 × 1
## state_lump
## <fct>
## 1 Alabama
## 2 Arizona
## 3 Arkansas
## 4 California
## 5 Colorado
## 6 Connecticut
## 7 Florida
## 8 Georgia
## 9 Hawaii
## 10 Idaho
## # ℹ 37 more rows
No need to do anything here.