Import data
# excel file
beesdata <- read_excel("../03_module6/beesdata.xlsx")
beesdata
## # A tibble: 1,222 × 10
## year months state colony_n colony_max colony_lost colony_lost_pct
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 2015 January-March Alabama 7000 7000 1800 26
## 2 2015 January-March Arizona 35000 35000 4600 13
## 3 2015 January-March Arkansas 13000 14000 1500 11
## 4 2015 January-March Californ… 1440000 1690000 255000 15
## 5 2015 January-March Colorado 3500 12500 1500 12
## 6 2015 January-March Connecti… 3900 3900 870 22
## 7 2015 January-March Florida 305000 315000 42000 13
## 8 2015 January-March Georgia 104000 105000 14500 14
## 9 2015 January-March Hawaii 10500 10500 380 4
## 10 2015 January-March Idaho 81000 88000 3700 4
## # ℹ 1,212 more rows
## # ℹ 3 more variables: colony_added <chr>, colony_reno <chr>,
## # colony_reno_pct <chr>
Apply the following dplyr verbs to your data
Filter rows
filter(beesdata, year == 2013, state == "New Hampshire")
## # A tibble: 0 × 10
## # ℹ 10 variables: year <dbl>, months <chr>, state <chr>, colony_n <dbl>,
## # colony_max <chr>, colony_lost <dbl>, colony_lost_pct <dbl>,
## # colony_added <chr>, colony_reno <chr>, colony_reno_pct <chr>
Arrange rows
arrange(beesdata, desc(colony_lost_pct), desc(year))
## # A tibble: 1,222 × 10
## year months state colony_n colony_max colony_lost colony_lost_pct
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 2018 January-March New M… 7500 7500 3900 52
## 2 2020 April-June Alaba… 8500 8500 4100 48
## 3 2016 January-March Oklah… 6000 6000 2900 48
## 4 2015 January-March Ohio 18000 22000 10500 48
## 5 2017 January-March Massa… 3300 3300 1500 45
## 6 2015 January-March Maryl… 7500 10000 4100 41
## 7 2015 January-March Illin… 6000 10500 4200 40
## 8 2015 October-December Kansas 8500 8500 3400 40
## 9 2015 January-March Kentu… 7500 10500 4100 39
## 10 2018 October-December New M… 8000 8000 3000 38
## # ℹ 1,212 more rows
## # ℹ 3 more variables: colony_added <chr>, colony_reno <chr>,
## # colony_reno_pct <chr>
Select columns
select(beesdata, year, months, state, colony_lost)
## # A tibble: 1,222 × 4
## year months state colony_lost
## <dbl> <chr> <chr> <dbl>
## 1 2015 January-March Alabama 1800
## 2 2015 January-March Arizona 4600
## 3 2015 January-March Arkansas 1500
## 4 2015 January-March California 255000
## 5 2015 January-March Colorado 1500
## 6 2015 January-March Connecticut 870
## 7 2015 January-March Florida 42000
## 8 2015 January-March Georgia 14500
## 9 2015 January-March Hawaii 380
## 10 2015 January-March Idaho 3700
## # ℹ 1,212 more rows
select(beesdata, year, months, state, contains("pct"))
## # A tibble: 1,222 × 5
## year months state colony_lost_pct colony_reno_pct
## <dbl> <chr> <chr> <dbl> <chr>
## 1 2015 January-March Alabama 26 4
## 2 2015 January-March Arizona 13 6
## 3 2015 January-March Arkansas 11 1
## 4 2015 January-March California 15 7
## 5 2015 January-March Colorado 12 1
## 6 2015 January-March Connecticut 22 NA
## 7 2015 January-March Florida 13 8
## 8 2015 January-March Georgia 14 9
## 9 2015 January-March Hawaii 4 7
## 10 2015 January-March Idaho 4 9
## # ℹ 1,212 more rows
Add columns
mutate(beesdata,
colony_after_loss = colony_n - colony_lost) %>%
select(year, state, colony_after_loss)
## # A tibble: 1,222 × 3
## year state colony_after_loss
## <dbl> <chr> <dbl>
## 1 2015 Alabama 5200
## 2 2015 Arizona 30400
## 3 2015 Arkansas 11500
## 4 2015 California 1185000
## 5 2015 Colorado 2000
## 6 2015 Connecticut 3030
## 7 2015 Florida 263000
## 8 2015 Georgia 89500
## 9 2015 Hawaii 10120
## 10 2015 Idaho 77300
## # ℹ 1,212 more rows
Summarize by groups
beesdata %>%
group_by(state) %>%
summarise(avg_loss = mean(colony_lost_pct, na.rm = TRUE)) %>%
arrange(avg_loss)
## # A tibble: 47 × 2
## state avg_loss
## <chr> <dbl>
## 1 New Jersey 5.28
## 2 Hawaii 5.46
## 3 Vermont 5.54
## 4 Montana 6.48
## 5 Connecticut 7.2
## 6 North Dakota 7.33
## 7 Oregon 7.48
## 8 Louisiana 7.52
## 9 South Dakota 8
## 10 Maine 8.52
## # ℹ 37 more rows