Import your data
# excel file
colony <- read_excel("../00_data/myData.xlsx")
set.seed(123)
colony_small <- colony %>%
sample_n(10) %>%
select(year, state, colony_lost)
colony_small
## # A tibble: 10 × 3
## year state colony_lost
## <dbl> <chr> <dbl>
## 1 2017 Utah 2700
## 2 2017 Vermont 170
## 3 2015 Texas 25000
## 4 2017 Hawaii 130
## 5 2016 Florida 45000
## 6 2019 Wyoming 3300
## 7 2021 Kansas 1400
## 8 2020 California 69000
## 9 2018 Florida 30000
## 10 2018 Texas 22000
Separating and Uniting
Separate a column
colony_small %>%
separate(year, into = c("century", "year"), sep = 2)
## # A tibble: 10 × 4
## century year state colony_lost
## <chr> <chr> <chr> <dbl>
## 1 20 17 Utah 2700
## 2 20 17 Vermont 170
## 3 20 15 Texas 25000
## 4 20 17 Hawaii 130
## 5 20 16 Florida 45000
## 6 20 19 Wyoming 3300
## 7 20 21 Kansas 1400
## 8 20 20 California 69000
## 9 20 18 Florida 30000
## 10 20 18 Texas 22000
Unite two columns
colony %>%
unite(col = "state_colony", c(state, colony_lost), sep = "/")
## # A tibble: 1,222 × 9
## year months state_c…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
## 1 2015 January-March Alabama/… 7000 7000 26 2800 250 4
## 2 2015 January-March Arizona/… 35000 35000 13 3400 2100 6
## 3 2015 January-March Arkansas… 13000 14000 11 1200 90 1
## 4 2015 January-March Californ… 1440000 1690000 15 250000 124000 7
## 5 2015 January-March Colorado… 3500 12500 12 200 140 1
## 6 2015 January-March Connecti… 3900 3900 22 290 NA NA
## 7 2015 January-March Florida/… 305000 315000 13 54000 25000 8
## 8 2015 January-March Georgia/… 104000 105000 14 47000 9500 9
## 9 2015 January-March Hawaii/3… 10500 10500 4 3400 760 7
## 10 2015 January-March Idaho/37… 81000 88000 4 2600 8000 9
## # … with 1,212 more rows, and abbreviated variable names ¹state_colony,
## # ²colony_n, ³colony_max, ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno,
## # ⁷colony_reno_pct
Missing Values
colony_small %>%
complete(year, state)
## # A tibble: 56 × 3
## year state colony_lost
## <dbl> <chr> <dbl>
## 1 2015 California NA
## 2 2015 Florida NA
## 3 2015 Hawaii NA
## 4 2015 Kansas NA
## 5 2015 Texas 25000
## 6 2015 Utah NA
## 7 2015 Vermont NA
## 8 2015 Wyoming NA
## 9 2016 California NA
## 10 2016 Florida 45000
## # … with 46 more rows