For my project I choose bee colony loss by year from the google drive data on annual loss of bee colonies because I am interested in conservation research. Bee are a very important part of a healthy biodiversity.
setwd("/Users/shoshigilg/Desktop/DATA 110")
df <- read.csv("bee_colony_BIP21_v2.csv")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(knitr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
library(ggrepel)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library("ggthemes")
library("knitr")
str(df)
## 'data.frame': 581 obs. of 10 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Year : chr "2020/21" "2020/21" "2020/21" "2020/21" ...
## $ Season : chr "Annual" "Annual" "Annual" "Annual" ...
## $ State : chr "Alaska" "District of Columbia" "Guam" "Hawaii" ...
## $ Total.Annual.Loss : num NA NA NA NA NA NA NA 56.5 64.6 52.5 ...
## $ Beekeepers : int NA NA NA NA NA NA NA 11 11 110 ...
## $ Beekeepers.Exclusive.to.State: num NA NA NA NA NA NA NA 90.9 90.9 93.6 ...
## $ Colonies : int NA NA NA NA NA NA NA 1229 513 3863 ...
## $ Colonies.Exclusive.to.State : num NA NA NA NA NA NA NA 4.8 33.2 40.6 ...
## $ State.Abbreviation : chr "AK" "DC" "GU" "HI" ...
# Use gsub() to remove some strings.
df$Total.Annual.Loss <- gsub("%", "", df$Total.Annual.Loss)
df$Beekeepers.Exclusive.to.State <- gsub("%", "", df$Beekeepers.Exclusive.to.State)
df$Colonies.Exclusive.to.State <-gsub("%","",df$Colonies.Exclusive.to.State)
df$State <- gsub("\t", "", df$State)
# convert the character type to numeric type.
df$Total.Annual.Loss <- as.numeric(df$Total.Annual.Loss)
df$Beekeepers.Exclusive.to.State <- as.numeric(df$Beekeepers.Exclusive.to.State)
df$Colonies.Exclusive.to.State <-as.numeric(df$Colonies.Exclusive.to.State)
library(knitr)
total_bee<- df %>%
group_by(Year) %>%
summarise(total_colony = sum(Colonies, na.rm= T),
total_beekeeper = sum(Beekeepers, na.rm = T),
average_loss = round(mean(Total.Annual.Loss, na.rm = T),2)
)
kable(total_bee)
| Year | total_colony | total_beekeeper | average_loss |
|---|---|---|---|
| 2010/11 | 611060 | 2639 | 40.82 |
| 2011/12 | 698560 | 3234 | 34.19 |
| 2012/13 | 3008783 | 4522 | 43.95 |
| 2013/14 | 2750706 | 6205 | 40.22 |
| 2014/15 | 2517979 | 5139 | 44.67 |
| 2015/16 | 2341670 | 4976 | 42.40 |
| 2016/17 | 1640309 | 4324 | 41.46 |
| 2017/18 | 706208 | 4113 | 47.40 |
| 2018/19 | 1386504 | 3911 | 42.25 |
| 2019/20 | 1260420 | 2861 | 44.38 |
| 2020/21 | 446024 | 2895 | 50.95 |
mean(total_bee$average_loss)
## [1] 42.97182
There has been an average 43% loss of honeybees in the U.S during 11 years.
p1<- ggplot(data = total_bee, aes(x=Year, y=average_loss)) +
labs(title= "Annual Loss Throughout 2010 - 2021",
x= "Year",
y= "Annual Loss") +
geom_col(aes(fill= average_loss))
p1
ggplot(df, aes(x= Year, y= Total.Annual.Loss, fill = "Year")) +
geom_boxplot( fill = "7") +
labs(x = "Year", y = "Total Annual Loss (%)", title ="Summary Statistics of Bee Loss by Year") +
theme(legend.position = "none") +
stat_summary(fun = mean, shape = 4, color = "darkgrey")
## Warning: Removed 58 rows containing non-finite values (stat_boxplot).
## Warning: Removed 58 rows containing non-finite values (stat_summary).
## Warning: Removed 11 rows containing missing values (geom_segment).
From the Boxplot, we can see the average, IQR, median values, and outliers of the annual honey bee loss rate. In 2020/21, both the average and median values go above 50%.
Bees and other pollinators face increasing risks to their survival, threatening foods such as apples, blueberries and coffee worth hundreds of billions of dollars a year. Pesticides, loss of habitats to farms and cities, disease and climate change were among threats to about 20,000 species of bees as well as creatures such as birds, butterflies, beetles and bats that fertilize flowers by spreading pollen.