bee_colonies <- read_excel("../00_data/MyData3.xlsx")
set.seed(123)
bee_colonies_small <- bee_colonies %>%
select(year, months, state, colony_size, colony_lost) %>%
sample_n(10)
bee_colonies_small %>%
summarise(sum(str_detect(colony_size, "^2")))
## # A tibble: 1 × 1
## `sum(str_detect(colony_size, "^2"))`
## <int>
## 1 3
str_detect(bee_colonies_small$colony_lost,"^2")
## [1] TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
sum(str_detect(bee_colonies_small$colony_lost,"^2"))
## [1] 3
mean(str_detect(bee_colonies_small$colony_lost,"^2"))
## [1] 0.3
states <- c("Vermont", "Texas", "California", "Florida", "Utah")
state_match <- str_c(states, collapse = "|")
state_match
## [1] "Vermont|Texas|California|Florida|Utah"
pattern_matching_states <- str_subset(states, state_match)
matches <- str_extract(pattern_matching_states, state_match)
head(matches)
## [1] "Vermont" "Texas" "California" "Florida" "Utah"
bee_colonies_small %>% mutate(colony_lost %>% str_replace("^[1-9999]", "-"))
## # A tibble: 10 × 6
## year months state colony_size colony_lost colony_lost %>% str_…¹
## <dbl> <chr> <chr> <dbl> <dbl> <chr>
## 1 2017 January-March Utah 16000 2700 -700
## 2 2017 April-June Vermont 6000 170 -70
## 3 2015 October-December Texas 125000 25000 -5000
## 4 2017 October-December Hawaii 15000 130 -30
## 5 2016 January-March Florida 245000 45000 -5000
## 6 2019 October-December Wyoming 27000 3300 -300
## 7 2021 January-March Kansas 5000 1400 -400
## 8 2020 July-September Califo… 640000 69000 -9000
## 9 2018 July-September Florida 197000 30000 -0000
## 10 2018 January-March Texas 205000 22000 -2000
## # ℹ abbreviated name: ¹`colony_lost %>% str_replace("^[1-9999]", "-")`
months <- c("Januray: March", "April: June", "October: December")
months %>% str_split(": ", n = 2, simplify = TRUE)
## [,1] [,2]
## [1,] "Januray" "March"
## [2,] "April" "June"
## [3,] "October" "December"