myData <- read_csv("../00_data/myData.csv")
## Rows: 1222 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): months, state
## dbl (8): year, colony_n, colony_max, colony_lost, colony_lost_pct, colony_ad...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#data %>% count(colony_reno_pct, colony_lost_pct) %>% filter(n > 10)
Divide it using dplyr::select in a way the two have a common variable, which you could use to join the two.
#colony_1sthalf <- data %>% select(year:colony_lost)
#colony_2ndhalf <- data %>% select(colony_lost:colony_reno_pct)
Use tidyr::left_join or other joining functions.
#left_join(colony_1sthalf, colony_2ndhalf)
#data %>%
# summarise(sum(str_detect(colony_lost_pct, "4$")))
#str_detect(data$colony_lost_pct,"4$")
#sum(str_detect(data$colony_lost_pct,"4$"))
states <- c("Connecticut", "Massachusetts", "Pennsylvania", "New York", "New Jersey", "New Hampshire", "Vermont", "Maine")
state_new_eng <- str_c(states, collapse = "|")
#data %>% mutate(colony_lost = colony_lost_pct %>% str_replace("[A-Z]", "-"))
#data %>% mutate(colony_lost = colony_lost_pct %>% str_replace_all("[A-Z]", "-"))