Import data
# excel file
data <- read_excel("../00_data/MyData-Charts.xlsx")
data
## # A tibble: 1,222 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2015 January-… Alab… 7000 7000 1800 26 2800 250 4
## 2 2015 January-… Ariz… 35000 35000 4600 13 3400 2100 6
## 3 2015 January-… Arka… 13000 14000 1500 11 1200 90 1
## 4 2015 January-… Cali… 1440000 1690000 255000 15 250000 124000 7
## 5 2015 January-… Colo… 3500 12500 1500 12 200 140 1
## 6 2015 January-… Conn… 3900 3900 870 22 290 NA NA
## 7 2015 January-… Flor… 305000 315000 42000 13 54000 25000 8
## 8 2015 January-… Geor… 104000 105000 14500 14 47000 9500 9
## 9 2015 January-… Hawa… 10500 10500 380 4 3400 760 7
## 10 2015 January-… Idaho 81000 88000 3700 4 2600 8000 9
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## # abbreviated variable names ¹colony_n, ²colony_max, ³colony_lost,
## # ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
Apply the following dplyr verbs to your data
Filter rows
filter(data, year == "2020", months == "January-March")
## # A tibble: 47 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2020 January-… Alab… 5500 8000 410 5 1000 780 10
## 2 2020 January-… Ariz… 29000 32000 6500 20 150 120 NA
## 3 2020 January-… Arka… 16500 17500 1200 7 700 30 NA
## 4 2020 January-… Cali… 1350000 1710000 230000 13 240000 66000 4
## 5 2020 January-… Colo… 4500 16000 650 4 850 500 3
## 6 2020 January-… Conn… 3700 4100 330 8 60 NA NA
## 7 2020 January-… Flor… 295000 310000 30000 10 41000 8000 3
## 8 2020 January-… Geor… 115000 121000 14500 12 32000 22000 18
## 9 2020 January-… Hawa… 16000 16000 1500 9 1000 2200 14
## 10 2020 January-… Idaho 156000 168000 18500 11 4400 920 1
## # … with 37 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## # abbreviated variable names ¹colony_n, ²colony_max, ³colony_lost,
## # ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
Arrange rows
arrange(data, desc(colony_lost_pct))
## # A tibble: 1,222 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2018 January-… New … 7500 7500 3900 52 180 NA NA
## 2 2015 January-… Ohio 18000 22000 10500 48 5000 3300 15
## 3 2016 January-… Okla… 6000 6000 2900 48 60 510 9
## 4 2020 April-Ju… Alab… 8500 8500 4100 48 4700 2620 31
## 5 2017 January-… Mass… 3300 3300 1500 45 120 110 3
## 6 2015 January-… Mary… 7500 10000 4100 41 1600 870 9
## 7 2015 January-… Illi… 6000 10500 4200 40 2300 390 4
## 8 2015 October-… Kans… 8500 8500 3400 40 50 20 NA
## 9 2015 January-… Kent… 7500 10500 4100 39 250 NA NA
## 10 2017 January-… New … 5500 5500 2100 38 3600 NA NA
## # … with 1,212 more rows, 1 more variable: `Growth of colonies` <dbl>, and
## # abbreviated variable names ¹colony_n, ²colony_max, ³colony_lost,
## # ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
Select columns
select(data, state, colony_lost_pct, colony_added, starts_with("abc"))
## # A tibble: 1,222 × 3
## state colony_lost_pct colony_added
## <chr> <dbl> <chr>
## 1 Alabama 26 2800
## 2 Arizona 13 3400
## 3 Arkansas 11 1200
## 4 California 15 250000
## 5 Colorado 12 200
## 6 Connecticut 22 290
## 7 Florida 13 54000
## 8 Georgia 14 47000
## 9 Hawaii 4 3400
## 10 Idaho 4 2600
## # … with 1,212 more rows
Add columns
plot_data <- data %>% group_by(year, state) %>% summarise(avg_pct = mean(colony_lost_pct, na.rm = TRUE)) %>% filter(year == 2021) %>% arrange(-avg_pct) %>% slice(1:10)
plot_data %>% ggplot(aes(x = avg_pct, y = fct_reorder(state, avg_pct))) + geom_col()

Sumarise
summarise(data, delay = mean(colony_lost_pct, na.rm = TRUE))
## # A tibble: 1 × 1
## delay
## <dbl>
## 1 11.4
Sumarise by Group
by_day <- group_by(data, year, months, state)
summarise(by_day, delay = mean(colony_lost_pct, na.rm = TRUE))
## # A tibble: 1,222 × 4
## # Groups: year, months [26]
## year months state delay
## <dbl> <chr> <chr> <dbl>
## 1 2015 April-June Alabama 12
## 2 2015 April-June Arizona 17
## 3 2015 April-June Arkansas 14
## 4 2015 April-June California 10
## 5 2015 April-June Colorado 8
## 6 2015 April-June Connecticut 3
## 7 2015 April-June Florida 16
## 8 2015 April-June Georgia 19
## 9 2015 April-June Hawaii 1
## 10 2015 April-June Idaho 9
## # … with 1,212 more rows