colony <- read_excel("../00_data/myData.xlsx")
set.seed(123)
colony_small <- colony %>%
sample_n(10) %>%
select(year, state, colony_lost, colony_added)
colony_small
## # A tibble: 10 × 4
## year state colony_lost colony_added
## <dbl> <chr> <dbl> <chr>
## 1 2017 Utah 2700 2900
## 2 2017 Vermont 170 390
## 3 2015 Texas 25000 13000
## 4 2017 Hawaii 130 970
## 5 2016 Florida 45000 36000
## 6 2019 Wyoming 3300 100
## 7 2021 Kansas 1400 2300
## 8 2020 California 69000 61000
## 9 2018 Florida 30000 53000
## 10 2018 Texas 22000 118000
colony_small %>% count(state, colony_lost)
## # A tibble: 10 × 3
## state colony_lost n
## <chr> <dbl> <int>
## 1 California 69000 1
## 2 Florida 30000 1
## 3 Florida 45000 1
## 4 Hawaii 130 1
## 5 Kansas 1400 1
## 6 Texas 22000 1
## 7 Texas 25000 1
## 8 Utah 2700 1
## 9 Vermont 170 1
## 10 Wyoming 3300 1
colony_small %>% count(state, colony_lost) %>% filter(n > 1)
## # A tibble: 0 × 3
## # … with 3 variables: state <chr>, colony_lost <dbl>, n <int>
Divide it using dplyr::select in a way the two have a common variable, which you could use to join the two.
colony_year.state <- colony_small %>% select(year:colony_added)
colony_year.state
## # A tibble: 10 × 4
## year state colony_lost colony_added
## <dbl> <chr> <dbl> <chr>
## 1 2017 Utah 2700 2900
## 2 2017 Vermont 170 390
## 3 2015 Texas 25000 13000
## 4 2017 Hawaii 130 970
## 5 2016 Florida 45000 36000
## 6 2019 Wyoming 3300 100
## 7 2021 Kansas 1400 2300
## 8 2020 California 69000 61000
## 9 2018 Florida 30000 53000
## 10 2018 Texas 22000 118000
colony_state.lost <- colony_small %>% select(colony_added:state:colony_lost)
## Warning in x:y: numerical expression has 3 elements: only the first used
colony_state.lost
## # A tibble: 10 × 2
## colony_added colony_lost
## <chr> <dbl>
## 1 2900 2700
## 2 390 170
## 3 13000 25000
## 4 970 130
## 5 36000 45000
## 6 100 3300
## 7 2300 1400
## 8 61000 69000
## 9 53000 30000
## 10 118000 22000
Use tidyr::left_join or other joining functions.
left_join(colony_year.state, colony_state.lost)
## Joining with `by = join_by(colony_lost, colony_added)`
## # A tibble: 10 × 4
## year state colony_lost colony_added
## <dbl> <chr> <dbl> <chr>
## 1 2017 Utah 2700 2900
## 2 2017 Vermont 170 390
## 3 2015 Texas 25000 13000
## 4 2017 Hawaii 130 970
## 5 2016 Florida 45000 36000
## 6 2019 Wyoming 3300 100
## 7 2021 Kansas 1400 2300
## 8 2020 California 69000 61000
## 9 2018 Florida 30000 53000
## 10 2018 Texas 22000 118000
x <- c("state", "months", "colony_lost")
str_detect(x, "s")
## [1] TRUE TRUE TRUE
str_extract(x, "state")
## [1] "state" NA NA
str_replace(x, "months", "NA")
## [1] "state" "NA" "colony_lost"