# excel file
data <- read_excel("../00_data/MyData-Charts.xlsx")
data
## # A tibble: 1,222 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2015 January-… Alab… 7000 7000 1800 26 2800 250 4
## 2 2015 January-… Ariz… 35000 35000 4600 13 3400 2100 6
## 3 2015 January-… Arka… 13000 14000 1500 11 1200 90 1
## 4 2015 January-… Cali… 1440000 1690000 255000 15 250000 124000 7
## 5 2015 January-… Colo… 3500 12500 1500 12 200 140 1
## 6 2015 January-… Conn… 3900 3900 870 22 290 NA NA
## 7 2015 January-… Flor… 305000 315000 42000 13 54000 25000 8
## 8 2015 January-… Geor… 104000 105000 14500 14 47000 9500 9
## 9 2015 January-… Hawa… 10500 10500 380 4 3400 760 7
## 10 2015 January-… Idaho 81000 88000 3700 4 2600 8000 9
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## # abbreviated variable names ¹colony_n, ²colony_max, ³colony_lost,
## # ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
skimr::skim(data)
Name | data |
Number of rows | 1222 |
Number of columns | 11 |
_______________________ | |
Column type frequency: | |
character | 6 |
numeric | 5 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
months | 0 | 1 | 10 | 16 | 0 | 4 | 0 |
state | 0 | 1 | 4 | 14 | 0 | 47 | 0 |
colony_max | 0 | 1 | 2 | 7 | 0 | 279 | 0 |
colony_added | 0 | 1 | 2 | 6 | 0 | 257 | 0 |
colony_reno | 0 | 1 | 2 | 6 | 0 | 253 | 0 |
colony_reno_pct | 0 | 1 | 1 | 2 | 0 | 55 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 2017.77 | 1.89 | 2015 | 2016 | 2018 | 2019 | 2021 | ▇▃▃▃▆ |
colony_n | 47 | 0.96 | 123578.04 | 437835.18 | 1300 | 8000 | 17500 | 55500 | 3181180 | ▇▁▁▁▁ |
colony_lost | 47 | 0.96 | 16548.43 | 60545.13 | 6 | 950 | 2200 | 6500 | 502350 | ▇▁▁▁▁ |
colony_lost_pct | 54 | 0.96 | 11.38 | 7.23 | 1 | 6 | 10 | 15 | 52 | ▇▅▁▁▁ |
Growth of colonies | 83 | 0.93 | 246.57 | 41054.97 | -377690 | -2200 | -430 | 1615 | 455650 | ▁▁▇▁▁ |
set.seed(123)
data_missing <- data %>%
select(state, colony_reno, colony_reno_pct) %>%
sample_n(10)
data_small <- data %>%
select(year, state, colony_lost) %>%
slice(1:10)
uniting <- data %>%
unite(col = "Loss_of_colonies", c(colony_lost,colony_lost_pct), sep = "/", )
data_wide <- data %>%
pivot_wider(names_from = state, values_from = colony_lost)
data_wide
## # A tibble: 1,176 × 56
## year months colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Alabama
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2015 Januar… 7000 7000 26 2800 250 4 1000 1800
## 2 2015 Januar… 35000 35000 13 3400 2100 6 -1200 NA
## 3 2015 Januar… 13000 14000 11 1200 90 1 -300 NA
## 4 2015 Januar… 1440000 1690000 15 250000 124000 7 -5000 NA
## 5 2015 Januar… 3500 12500 12 200 140 1 -1300 NA
## 6 2015 Januar… 3900 3900 22 290 NA NA -580 NA
## 7 2015 Januar… 305000 315000 13 54000 25000 8 12000 NA
## 8 2015 Januar… 104000 105000 14 47000 9500 9 32500 NA
## 9 2015 Januar… 10500 10500 4 3400 760 7 3020 NA
## 10 2015 Januar… 81000 88000 4 2600 8000 9 -1100 NA
## # … with 1,166 more rows, 46 more variables: Arizona <dbl>, Arkansas <dbl>,
## # California <dbl>, Colorado <dbl>, Connecticut <dbl>, Florida <dbl>,
## # Georgia <dbl>, Hawaii <dbl>, Idaho <dbl>, Illinois <dbl>, Indiana <dbl>,
## # Iowa <dbl>, Kansas <dbl>, Kentucky <dbl>, Louisiana <dbl>, Maine <dbl>,
## # Maryland <dbl>, Massachusetts <dbl>, Michigan <dbl>, Minnesota <dbl>,
## # Mississippi <dbl>, Missouri <dbl>, Montana <dbl>, Nebraska <dbl>,
## # `New Jersey` <dbl>, `New Mexico` <dbl>, `New York` <dbl>, …
data_wide %>%
pivot_longer(cols = `Alabama`:`Wyoming`, names_to = "states", values_to = "colony_lost", values_drop_na = TRUE)
## # A tibble: 1,125 × 13
## year months colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Other…⁸
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2015 Januar… 7000 7000 26 2800 250 4 1000 NA
## 2 2015 Januar… 35000 35000 13 3400 2100 6 -1200 NA
## 3 2015 Januar… 13000 14000 11 1200 90 1 -300 NA
## 4 2015 Januar… 1440000 1690000 15 250000 124000 7 -5000 NA
## 5 2015 Januar… 3500 12500 12 200 140 1 -1300 NA
## 6 2015 Januar… 3900 3900 22 290 NA NA -580 NA
## 7 2015 Januar… 305000 315000 13 54000 25000 8 12000 NA
## 8 2015 Januar… 104000 105000 14 47000 9500 9 32500 NA
## 9 2015 Januar… 10500 10500 4 3400 760 7 3020 NA
## 10 2015 Januar… 81000 88000 4 2600 8000 9 -1100 NA
## # … with 1,115 more rows, 3 more variables: `United States` <dbl>,
## # states <chr>, colony_lost <dbl>, and abbreviated variable names ¹colony_n,
## # ²colony_max, ³colony_lost_pct, ⁴colony_added, ⁵colony_reno,
## # ⁶colony_reno_pct, ⁷`Growth of colonies`, ⁸`Other States`
uniting a column
Dataspec <- data_small %>%
unite(col = "State_Loss", c(state, colony_lost), sep = "/")
Dataspec
## # A tibble: 10 × 2
## year State_Loss
## <dbl> <chr>
## 1 2015 Alabama/1800
## 2 2015 Arizona/4600
## 3 2015 Arkansas/1500
## 4 2015 California/255000
## 5 2015 Colorado/1500
## 6 2015 Connecticut/870
## 7 2015 Florida/42000
## 8 2015 Georgia/14500
## 9 2015 Hawaii/380
## 10 2015 Idaho/3700
Dataspec %>%
separate(State_Loss, into = c("state", "colony_lost"))
## # A tibble: 10 × 3
## year state colony_lost
## <dbl> <chr> <chr>
## 1 2015 Alabama 1800
## 2 2015 Arizona 4600
## 3 2015 Arkansas 1500
## 4 2015 California 255000
## 5 2015 Colorado 1500
## 6 2015 Connecticut 870
## 7 2015 Florida 42000
## 8 2015 Georgia 14500
## 9 2015 Hawaii 380
## 10 2015 Idaho 3700
data_wide <- data %>%
pivot_wider(names_from = state, values_from = colony_lost)
data_wide
## # A tibble: 1,176 × 56
## year months colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ Growt…⁷ Alabama
## <dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2015 Januar… 7000 7000 26 2800 250 4 1000 1800
## 2 2015 Januar… 35000 35000 13 3400 2100 6 -1200 NA
## 3 2015 Januar… 13000 14000 11 1200 90 1 -300 NA
## 4 2015 Januar… 1440000 1690000 15 250000 124000 7 -5000 NA
## 5 2015 Januar… 3500 12500 12 200 140 1 -1300 NA
## 6 2015 Januar… 3900 3900 22 290 NA NA -580 NA
## 7 2015 Januar… 305000 315000 13 54000 25000 8 12000 NA
## 8 2015 Januar… 104000 105000 14 47000 9500 9 32500 NA
## 9 2015 Januar… 10500 10500 4 3400 760 7 3020 NA
## 10 2015 Januar… 81000 88000 4 2600 8000 9 -1100 NA
## # … with 1,166 more rows, 46 more variables: Arizona <dbl>, Arkansas <dbl>,
## # California <dbl>, Colorado <dbl>, Connecticut <dbl>, Florida <dbl>,
## # Georgia <dbl>, Hawaii <dbl>, Idaho <dbl>, Illinois <dbl>, Indiana <dbl>,
## # Iowa <dbl>, Kansas <dbl>, Kentucky <dbl>, Louisiana <dbl>, Maine <dbl>,
## # Maryland <dbl>, Massachusetts <dbl>, Michigan <dbl>, Minnesota <dbl>,
## # Mississippi <dbl>, Missouri <dbl>, Montana <dbl>, Nebraska <dbl>,
## # `New Jersey` <dbl>, `New Mexico` <dbl>, `New York` <dbl>, …