Import your data

# excel file
colony <- read_excel("../00_data/myData.xlsx")
set.seed(123)

colony_small <- colony %>%
    sample_n(10) %>%
    select(year, state, colony_lost)

colony_small
## # A tibble: 10 × 3
##     year state      colony_lost
##    <dbl> <chr>            <dbl>
##  1  2017 Utah              2700
##  2  2017 Vermont            170
##  3  2015 Texas            25000
##  4  2017 Hawaii             130
##  5  2016 Florida          45000
##  6  2019 Wyoming           3300
##  7  2021 Kansas            1400
##  8  2020 California       69000
##  9  2018 Florida          30000
## 10  2018 Texas            22000

Pivoting

long to wide form

colony_wide <- colony_small %>%
    
    pivot_wider(names_from = year, 
                values_from = colony_lost)

colony_wide
## # A tibble: 8 × 8
##   state      `2017` `2015` `2016` `2019` `2021` `2020` `2018`
##   <chr>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 Utah         2700     NA     NA     NA     NA     NA     NA
## 2 Vermont       170     NA     NA     NA     NA     NA     NA
## 3 Texas          NA  25000     NA     NA     NA     NA  22000
## 4 Hawaii        130     NA     NA     NA     NA     NA     NA
## 5 Florida        NA     NA  45000     NA     NA     NA  30000
## 6 Wyoming        NA     NA     NA   3300     NA     NA     NA
## 7 Kansas         NA     NA     NA     NA   1400     NA     NA
## 8 California     NA     NA     NA     NA     NA  69000     NA

wide to long form

colony_wide %>%
    pivot_longer(cols           = `2017`:`2018`, 
                 names_to       = "year", 
                 values_to      = "colony_lost", 
                 values_drop_na = TRUE)
## # A tibble: 10 × 3
##    state      year  colony_lost
##    <chr>      <chr>       <dbl>
##  1 Utah       2017         2700
##  2 Vermont    2017          170
##  3 Texas      2015        25000
##  4 Texas      2018        22000
##  5 Hawaii     2017          130
##  6 Florida    2016        45000
##  7 Florida    2018        30000
##  8 Wyoming    2019         3300
##  9 Kansas     2021         1400
## 10 California 2020        69000

Separating and Uniting

Separate a column

colony_small %>% 
  separate(year, into = c("century", "year"), sep = 2)
## # A tibble: 10 × 4
##    century year  state      colony_lost
##    <chr>   <chr> <chr>            <dbl>
##  1 20      17    Utah              2700
##  2 20      17    Vermont            170
##  3 20      15    Texas            25000
##  4 20      17    Hawaii             130
##  5 20      16    Florida          45000
##  6 20      19    Wyoming           3300
##  7 20      21    Kansas            1400
##  8 20      20    California       69000
##  9 20      18    Florida          30000
## 10 20      18    Texas            22000

Unite two columns

colony %>%
    
    unite(col = "state_colony", c(state, colony_lost), sep = "/")
## # A tibble: 1,222 × 9
##     year months        state_c…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>         <chr>       <dbl> <chr>     <dbl> <chr>   <chr>   <chr>  
##  1  2015 January-March Alabama/…    7000 7000         26 2800    250     4      
##  2  2015 January-March Arizona/…   35000 35000        13 3400    2100    6      
##  3  2015 January-March Arkansas…   13000 14000        11 1200    90      1      
##  4  2015 January-March Californ… 1440000 1690000      15 250000  124000  7      
##  5  2015 January-March Colorado…    3500 12500        12 200     140     1      
##  6  2015 January-March Connecti…    3900 3900         22 290     NA      NA     
##  7  2015 January-March Florida/…  305000 315000       13 54000   25000   8      
##  8  2015 January-March Georgia/…  104000 105000       14 47000   9500    9      
##  9  2015 January-March Hawaii/3…   10500 10500         4 3400    760     7      
## 10  2015 January-March Idaho/37…   81000 88000         4 2600    8000    9      
## # … with 1,212 more rows, and abbreviated variable names ¹​state_colony,
## #   ²​colony_n, ³​colony_max, ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno,
## #   ⁷​colony_reno_pct

Missing Values

colony_small %>%
    
    complete(year, state)
## # A tibble: 56 × 3
##     year state      colony_lost
##    <dbl> <chr>            <dbl>
##  1  2015 California          NA
##  2  2015 Florida             NA
##  3  2015 Hawaii              NA
##  4  2015 Kansas              NA
##  5  2015 Texas            25000
##  6  2015 Utah                NA
##  7  2015 Vermont             NA
##  8  2015 Wyoming             NA
##  9  2016 California          NA
## 10  2016 Florida          45000
## # … with 46 more rows