Import your data

# excel file
data <- read_excel("../00_data/MyData-Charts.xlsx")
data
## # A tibble: 1,222 × 11
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2015 January-… Alab…    7000 7000       1800      26 2800    250     4      
##  2  2015 January-… Ariz…   35000 35000      4600      13 3400    2100    6      
##  3  2015 January-… Arka…   13000 14000      1500      11 1200    90      1      
##  4  2015 January-… Cali… 1440000 1690000  255000      15 250000  124000  7      
##  5  2015 January-… Colo…    3500 12500      1500      12 200     140     1      
##  6  2015 January-… Conn…    3900 3900        870      22 290     NA      NA     
##  7  2015 January-… Flor…  305000 315000    42000      13 54000   25000   8      
##  8  2015 January-… Geor…  104000 105000    14500      14 47000   9500    9      
##  9  2015 January-… Hawa…   10500 10500       380       4 3400    760     7      
## 10  2015 January-… Idaho   81000 88000      3700       4 2600    8000    9      
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## #   abbreviated variable names ¹​colony_n, ²​colony_max, ³​colony_lost,
## #   ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno, ⁷​colony_reno_pct
skimr::skim(data)
Data summary
Name data
Number of rows 1222
Number of columns 11
_______________________
Column type frequency:
character 6
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
months 0 1 10 16 0 4 0
state 0 1 4 14 0 47 0
colony_max 0 1 2 7 0 279 0
colony_added 0 1 2 6 0 257 0
colony_reno 0 1 2 6 0 253 0
colony_reno_pct 0 1 1 2 0 55 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2017.77 1.89 2015 2016 2018 2019 2021 ▇▃▃▃▆
colony_n 47 0.96 123578.04 437835.18 1300 8000 17500 55500 3181180 ▇▁▁▁▁
colony_lost 47 0.96 16548.43 60545.13 6 950 2200 6500 502350 ▇▁▁▁▁
colony_lost_pct 54 0.96 11.38 7.23 1 6 10 15 52 ▇▅▁▁▁
Growth of colonies 83 0.93 246.57 41054.97 -377690 -2200 -430 1615 455650 ▁▁▇▁▁
set.seed(123)
data_missing <- data %>%
    select(state, colony_reno, colony_reno_pct) %>%
    sample_n(10)


data_small <- data %>%
    select(year, state, colony_lost) %>%
    slice(1:10)

uniting <- data %>%
    
    unite(col = "Loss_of_colonies", c(colony_lost,colony_lost_pct), sep = "/", )

Pivoting

long to wide form

data_wide <- data %>%
    pivot_wider(names_from = state, values_from = colony_lost)

data_wide
## # A tibble: 1,176 × 56
##     year months  colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Alabama
##    <dbl> <chr>     <dbl> <chr>     <dbl> <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1  2015 Januar…    7000 7000         26 2800    250     4          1000    1800
##  2  2015 Januar…   35000 35000        13 3400    2100    6         -1200      NA
##  3  2015 Januar…   13000 14000        11 1200    90      1          -300      NA
##  4  2015 Januar… 1440000 1690000      15 250000  124000  7         -5000      NA
##  5  2015 Januar…    3500 12500        12 200     140     1         -1300      NA
##  6  2015 Januar…    3900 3900         22 290     NA      NA         -580      NA
##  7  2015 Januar…  305000 315000       13 54000   25000   8         12000      NA
##  8  2015 Januar…  104000 105000       14 47000   9500    9         32500      NA
##  9  2015 Januar…   10500 10500         4 3400    760     7          3020      NA
## 10  2015 Januar…   81000 88000         4 2600    8000    9         -1100      NA
## # … with 1,166 more rows, 46 more variables: Arizona <dbl>, Arkansas <dbl>,
## #   California <dbl>, Colorado <dbl>, Connecticut <dbl>, Florida <dbl>,
## #   Georgia <dbl>, Hawaii <dbl>, Idaho <dbl>, Illinois <dbl>, Indiana <dbl>,
## #   Iowa <dbl>, Kansas <dbl>, Kentucky <dbl>, Louisiana <dbl>, Maine <dbl>,
## #   Maryland <dbl>, Massachusetts <dbl>, Michigan <dbl>, Minnesota <dbl>,
## #   Mississippi <dbl>, Missouri <dbl>, Montana <dbl>, Nebraska <dbl>,
## #   `New Jersey` <dbl>, `New Mexico` <dbl>, `New York` <dbl>, …

wide to long form

data_wide %>%
    pivot_longer(cols = `Alabama`:`Wyoming`, names_to = "states", values_to = "colony_lost", values_drop_na = TRUE)
## # A tibble: 1,125 × 13
##     year months  colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Other…⁸
##    <dbl> <chr>     <dbl> <chr>     <dbl> <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1  2015 Januar…    7000 7000         26 2800    250     4          1000      NA
##  2  2015 Januar…   35000 35000        13 3400    2100    6         -1200      NA
##  3  2015 Januar…   13000 14000        11 1200    90      1          -300      NA
##  4  2015 Januar… 1440000 1690000      15 250000  124000  7         -5000      NA
##  5  2015 Januar…    3500 12500        12 200     140     1         -1300      NA
##  6  2015 Januar…    3900 3900         22 290     NA      NA         -580      NA
##  7  2015 Januar…  305000 315000       13 54000   25000   8         12000      NA
##  8  2015 Januar…  104000 105000       14 47000   9500    9         32500      NA
##  9  2015 Januar…   10500 10500         4 3400    760     7          3020      NA
## 10  2015 Januar…   81000 88000         4 2600    8000    9         -1100      NA
## # … with 1,115 more rows, 3 more variables: `United States` <dbl>,
## #   states <chr>, colony_lost <dbl>, and abbreviated variable names ¹​colony_n,
## #   ²​colony_max, ³​colony_lost_pct, ⁴​colony_added, ⁵​colony_reno,
## #   ⁶​colony_reno_pct, ⁷​`Growth of colonies`, ⁸​`Other States`

Separating and Uniting

Unite two columns

uniting a column

Dataspec <- data_small %>%
    
    unite(col = "State_Loss", c(state, colony_lost), sep = "/")

Dataspec
## # A tibble: 10 × 2
##     year State_Loss       
##    <dbl> <chr>            
##  1  2015 Alabama/1800     
##  2  2015 Arizona/4600     
##  3  2015 Arkansas/1500    
##  4  2015 California/255000
##  5  2015 Colorado/1500    
##  6  2015 Connecticut/870  
##  7  2015 Florida/42000    
##  8  2015 Georgia/14500    
##  9  2015 Hawaii/380       
## 10  2015 Idaho/3700

Separate a column

Dataspec %>% 
  separate(State_Loss, into = c("state", "colony_lost"))
## # A tibble: 10 × 3
##     year state       colony_lost
##    <dbl> <chr>       <chr>      
##  1  2015 Alabama     1800       
##  2  2015 Arizona     4600       
##  3  2015 Arkansas    1500       
##  4  2015 California  255000     
##  5  2015 Colorado    1500       
##  6  2015 Connecticut 870        
##  7  2015 Florida     42000      
##  8  2015 Georgia     14500      
##  9  2015 Hawaii      380        
## 10  2015 Idaho       3700

Missing Values

data_wide <- data %>%
    pivot_wider(names_from = state, values_from = colony_lost)

data_wide
## # A tibble: 1,176 × 56
##     year months  colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Alabama
##    <dbl> <chr>     <dbl> <chr>     <dbl> <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1  2015 Januar…    7000 7000         26 2800    250     4          1000    1800
##  2  2015 Januar…   35000 35000        13 3400    2100    6         -1200      NA
##  3  2015 Januar…   13000 14000        11 1200    90      1          -300      NA
##  4  2015 Januar… 1440000 1690000      15 250000  124000  7         -5000      NA
##  5  2015 Januar…    3500 12500        12 200     140     1         -1300      NA
##  6  2015 Januar…    3900 3900         22 290     NA      NA         -580      NA
##  7  2015 Januar…  305000 315000       13 54000   25000   8         12000      NA
##  8  2015 Januar…  104000 105000       14 47000   9500    9         32500      NA
##  9  2015 Januar…   10500 10500         4 3400    760     7          3020      NA
## 10  2015 Januar…   81000 88000         4 2600    8000    9         -1100      NA
## # … with 1,166 more rows, 46 more variables: Arizona <dbl>, Arkansas <dbl>,
## #   California <dbl>, Colorado <dbl>, Connecticut <dbl>, Florida <dbl>,
## #   Georgia <dbl>, Hawaii <dbl>, Idaho <dbl>, Illinois <dbl>, Indiana <dbl>,
## #   Iowa <dbl>, Kansas <dbl>, Kentucky <dbl>, Louisiana <dbl>, Maine <dbl>,
## #   Maryland <dbl>, Massachusetts <dbl>, Michigan <dbl>, Minnesota <dbl>,
## #   Mississippi <dbl>, Missouri <dbl>, Montana <dbl>, Nebraska <dbl>,
## #   `New Jersey` <dbl>, `New Mexico` <dbl>, `New York` <dbl>, …