colony <- read_excel("../00_data/myData.xlsx")
months <- c("January-March", "April-June", "July-September", "October-December")
months
## [1] "January-March" "April-June" "July-September" "October-December"
month_levels <- c("Janurary", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")
colony_months <- factor(months, levels = month_levels)
colony_months
## [1] <NA> <NA> <NA> <NA>
## 12 Levels: Janurary February March April May June July August ... December
set.seed(123)
colony_small <- colony %>%
sample_n(9) %>%
select(year, state, colony_lost, colony_added)
colony_small
## # A tibble: 9 × 4
## year state colony_lost colony_added
## <dbl> <chr> <dbl> <chr>
## 1 2017 Utah 2700 2900
## 2 2017 Vermont 170 390
## 3 2015 Texas 25000 13000
## 4 2017 Hawaii 130 970
## 5 2016 Florida 45000 36000
## 6 2019 Wyoming 3300 100
## 7 2021 Kansas 1400 2300
## 8 2020 California 69000 61000
## 9 2018 Florida 30000 53000
loss_summary <- colony_small %>%
group_by(state) %>%
summarise(colony_added = mean(colony_added, na.rm = TRUE), colony_lost = mean(colony_lost, na.rm = TRUE), n = n())
## Warning: There were 8 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `colony_added = mean(colony_added, na.rm = TRUE)`.
## ℹ In group 1: `state = "California"`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 7 remaining warnings.
ggplot(loss_summary, aes(colony_lost, state)) + geom_point()
ggplot(loss_summary, aes(colony_lost, fct_reorder(state, colony_lost))) +
geom_point()
colony %>% distinct(months)
## # A tibble: 4 × 1
## months
## <chr>
## 1 January-March
## 2 April-June
## 3 July-September
## 4 October-December
colony %>%
mutate(Jan_group = fct_recode(months, "Jan, Feb, Mar" = "January-March")) %>%
select(months, Jan_group) %>%
filter(months == "January-March")
## # A tibble: 329 × 2
## months Jan_group
## <chr> <fct>
## 1 January-March Jan, Feb, Mar
## 2 January-March Jan, Feb, Mar
## 3 January-March Jan, Feb, Mar
## 4 January-March Jan, Feb, Mar
## 5 January-March Jan, Feb, Mar
## 6 January-March Jan, Feb, Mar
## 7 January-March Jan, Feb, Mar
## 8 January-March Jan, Feb, Mar
## 9 January-March Jan, Feb, Mar
## 10 January-March Jan, Feb, Mar
## # … with 319 more rows
colony_small %>%
mutate(state_col = fct_collapse(state, "Southern state" = c("Texas", "Florida"))) %>%
select(state, state_col)
## # A tibble: 9 × 2
## state state_col
## <chr> <fct>
## 1 Utah Utah
## 2 Vermont Vermont
## 3 Texas Southern state
## 4 Hawaii Hawaii
## 5 Florida Southern state
## 6 Wyoming Wyoming
## 7 Kansas Kansas
## 8 California California
## 9 Florida Southern state
colony_small %>% count(state)
## # A tibble: 8 × 2
## state n
## <chr> <int>
## 1 California 1
## 2 Florida 2
## 3 Hawaii 1
## 4 Kansas 1
## 5 Texas 1
## 6 Utah 1
## 7 Vermont 1
## 8 Wyoming 1
colony_small %>% mutate(state_lump = fct_lump(state)) %>% distinct(state_lump)
## # A tibble: 8 × 1
## state_lump
## <fct>
## 1 Utah
## 2 Vermont
## 3 Texas
## 4 Hawaii
## 5 Florida
## 6 Wyoming
## 7 Kansas
## 8 California
No need to do anything here.