Intro:

For my project I choose bee colony loss by year from the google drive data on annual loss of bee colonies because I am interested in conservation research. Bee are a very important part of a healthy biodiversity.

Loading required libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
library(viridis)
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
library(readr)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(RColorBrewer)

Setting working directory.

setwd("~/Desktop/DATA 110/Final")
bee_colony <- read.csv("bee_colony_BIP21_v3.csv") %>%
mutate(loss = Colonies * Total.Annual.Loss / 100, 
       colonies_per_keeper = Colonies / Beekeepers)
str(bee_colony)
## 'data.frame':    581 obs. of  12 variables:
##  $ X                            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Year                         : int  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ Season                       : chr  "Annual" "Annual" "Annual" "Annual" ...
##  $ State                        : chr  "Alaska" "District of Columbia" "Guam" "Hawaii" ...
##  $ Total.Annual.Loss            : num  NA NA NA NA NA NA NA 56.5 64.6 52.5 ...
##  $ Beekeepers                   : int  NA NA NA NA NA NA NA 11 11 110 ...
##  $ Beekeepers.Exclusive.to.State: num  NA NA NA NA NA NA NA 90.9 90.9 93.6 ...
##  $ Colonies                     : int  NA NA NA NA NA NA NA 1229 513 3863 ...
##  $ Colonies.Exclusive.to.State  : num  NA NA NA NA NA NA NA 4.8 33.2 40.6 ...
##  $ State.Abbreviation           : chr  "AK" "DC" "GU" "HI" ...
##  $ loss                         : num  NA NA NA NA NA ...
##  $ colonies_per_keeper          : num  NA NA NA NA NA ...

Data Cleaning:

bee_colony$Total.Annual.Loss <- gsub("%", "", bee_colony$Total.Annual.Loss) 

bee_colony$Colonies.Exclusive.to.State <-gsub("%","",bee_colony$Colonies.Exclusive.to.State)

bee_colony$State <- gsub("\t", "", bee_colony$State) 
bee_colony$Total.Annual.Loss <- as.numeric(bee_colony$Total.Annual.Loss)

bee_colony$Colonies.Exclusive.to.State <-as.numeric(bee_colony$Colonies.Exclusive.to.State)

Using dplyr commands to compute the total number of bee colonies the average bee loss ratio across all the states 2010 to 2021

bee_loss <- bee_colony%>%
  group_by(Year) %>%
  summarise(total_colony  = sum(Colonies, na.rm= TRUE),
            total_loss = sum(loss, na.rm= TRUE),
            average_loss = round(total_loss * 100 / total_colony,2))
round(mean(bee_loss$average_loss),2)
## [1] 38.98

There has been an average 39% loss of honeybees in the U.S during 11 years.

cols <- brewer.pal(4, "Oranges")
  highchart() %>%
  hc_add_series(data = bee_loss,
                type = "line", hcaes(x = Year,
                                     y = average_loss)) %>%
    hc_xAxis(title = list(text="Year")) %>%
    hc_yAxis(title = list(text="Annual Percentage Colony Loss")) %>%
    hc_title(text = "Bee Colony Decline") %>%
    hc_colors("Orange")
p1 <- ggplot(bee_colony, aes(x = colonies_per_keeper, y = Total.Annual.Loss)) +
  xlab("Average Number of Colonies per Beekeeper") + 
  ylab("Annual Percentage Colony Loss") +
  theme_minimal(base_size = 12) +
  geom_point(alpha = 0.5, color = "orange") + 
  geom_smooth(method= lm, formula=y~x, se = FALSE, color = "black", lty=2, size = 0.3)
  
p1<- ggplotly(p1)
## Warning: Removed 58 rows containing non-finite values (stat_smooth).
p1

Remove outlier point and re-plot

bee_colony2 <- bee_colony[bee_colony$colonies_per_keeper <= 15000,]
p2 <- ggplot(bee_colony2, aes(x = colonies_per_keeper, y = Total.Annual.Loss)) +
  xlab("Average Number of Colonies per Beekeeper") + 
  ylab("Annual Percentage Colony Loss") +
  theme_minimal(base_size = 12) +
   geom_point(alpha = 0.5, color = "orange") + 
  geom_smooth(method= lm, formula=y~x, se = FALSE, color = "black", lty=2, size = 0.3)
p2 <-ggplotly(p2)
## Warning: Removed 58 rows containing non-finite values (stat_smooth).
p2

How strong is the correlation between the ratio of beekeepers to colonies and the rate of colony loss?

cor(bee_colony2$colonies_per_keeper, bee_colony2$Total.Annual.Loss)
## [1] NA
fit1 <- lm(Total.Annual.Loss ~ colonies_per_keeper, data = bee_colony2)
summary(fit1)
## 
## Call:
## lm(formula = Total.Annual.Loss ~ colonies_per_keeper, data = bee_colony2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.617  -8.940  -1.431   7.995  43.688 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         43.3226519  0.6400823  67.683   <2e-16 ***
## colonies_per_keeper -0.0008407  0.0003709  -2.267   0.0238 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.48 on 520 degrees of freedom
##   (58 observations deleted due to missingness)
## Multiple R-squared:  0.009785,   Adjusted R-squared:  0.007881 
## F-statistic: 5.139 on 1 and 520 DF,  p-value: 0.02381

R-squared is small, so there’s not a strong correlation

Essay

Bees and other pollinators face increasing risks to their survival, threatening foods such as apples, blueberries and coffee worth hundreds of billions of dollars a year. Pesticides, loss of habitats to farms and cities, disease and climate change were among threats to about 20,000 species of bees as well as creatures such as birds, butterflies, beetles and bats that fertilize flowers by spreading pollen. Because of “a trend towards intensification of agriculture in the form of monocultures and pollination-dependent crops.” (Aizen et al., 2019) In a study conducted by (Martínez-López, Ruiz, & De la Rúa) Migratory beekeeping was found to produce several impacts on the honey bee colonies that could be related with pathogen and parasite prevalence. Which could also be effecting the bee populations conducted in this study. It especially important for civilians do their share in helping the population included in this final project is a Farmer Almanac article on how to create a bee home for traveling bees and additionally an admirable story of a young entrepreneur Mailka Ulmer who started a lemonade business called Bee Sweet is working on saving bees.