Introduction

General descriptive statistics for 2015 rare data collected by Ohio State.

Distribution

The git repo holding this code is available at: https://github.com/puruckertom/rare_pollen

Computational environment

Version and installed libraries.

verbose_output = TRUE

if(verbose_output){
  print(Sys.info()[4])
  R.Version()$version.string
}
##          nodename 
## "DZ2626UTPURUCKE"
## [1] "R version 3.3.2 (2016-10-31)"
#check to see if needed packages are installed
list.of.packages <- c("ggplot2", "dplyr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
#load relevant packages
library(MASS)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.3.3
if(verbose_output){
  print("list of loaded packages: ")
  print((.packages()))
}
## [1] "list of loaded packages: "
##  [1] "reshape2"  "ggplot2"   "dplyr"     "MASS"      "stats"    
##  [6] "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [11] "base"

Analytical data

Original file with observational data: https://github.com/puruckertom/rare_pollen/blob/master/data_in/rare_data_osu_2015.csv

##################
#the data sets
##################
#import raw data - everything
file.exists(paste(osu_data,"rare_data_osu_2015.csv",sep=""))
## [1] TRUE
osu_obs <- read.table(paste(osu_data,"rare_data_osu_2015.csv",sep=""), header = TRUE, sep = ",")
str(osu_obs)
## 'data.frame':    540 obs. of  18 variables:
##  $ original_order: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ osu_run       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ sample_id     : Factor w/ 540 levels "DBT_1","DBT_10",..: 101 112 123 134 145 156 167 178 189 102 ...
##  $ date          : Factor w/ 46 levels "04/27/2015","04/28/2015",..: 3 5 3 3 3 4 3 3 3 4 ...
##  $ site_code     : Factor w/ 36 levels "BG","BG502","BG505",..: 1 5 12 17 20 21 22 25 28 34 ...
##  $ site_abbrv    : Factor w/ 11 levels "BG","DS","FSR",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ site          : Factor w/ 11 levels "Badger","Don Scott",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ initial_weight: num  3 3 3 3 3 1 3 3 3 1 ...
##  $ final_weight  : num  1.99 2 2 1.98 2 ...
##  $ units         : Factor w/ 1 level "ng/g": 1 1 1 1 1 1 1 1 1 1 ...
##  $ media         : Factor w/ 10 levels "dead_bee_traps",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ dinotefuran   : Factor w/ 85 levels "< 0.029","< 0.040",..: 2 2 2 46 2 2 2 2 2 45 ...
##  $ nitenpyram    : Factor w/ 108 levels "< 0.061","< 0.078",..: 38 47 9 30 9 9 9 9 55 9 ...
##  $ thiamethoxam  : Factor w/ 239 levels "< 0.028","< 0.057",..: 5 175 5 5 135 5 67 5 143 223 ...
##  $ clothianidin  : Factor w/ 221 levels "< 0.373","< 0.402",..: 115 198 92 15 114 3 119 3 27 180 ...
##  $ imidacloprid  : Factor w/ 256 levels "< 0.026","< 0.032",..: 5 61 54 5 215 5 5 200 5 248 ...
##  $ acetamiprid   : Factor w/ 212 levels "< 0.027","< 0.029",..: 5 126 55 5 101 95 116 5 5 147 ...
##  $ thiacloprid   : Factor w/ 284 levels "< 0.024","< 0.031",..: 186 28 88 189 5 177 151 187 5 217 ...
#melt to long format
keepers <- colnames(osu_obs)[1:11] #through media column
osu_concs <- melt(osu_obs, id.vars = keepers, variable.name = "neonic", value.name = "conc")
## Warning: attributes are not identical across measure variables; they will
## be dropped
dim(osu_concs)
## [1] 3780   13
colnames(osu_concs)
##  [1] "original_order" "osu_run"        "sample_id"      "date"          
##  [5] "site_code"      "site_abbrv"     "site"           "initial_weight"
##  [9] "final_weight"   "units"          "media"          "neonic"        
## [13] "conc"
#add detected field and numerical concentration
#osu_concs$conc
osu_concs$detected <- ifelse(grepl("<", osu_concs$conc),0,1)
osu_concs$conc2 <- as.numeric(gsub("< ", "", osu_concs$conc))
#View(osu_concs)

Summary statistics

str(osu_concs)
## 'data.frame':    3780 obs. of  15 variables:
##  $ original_order: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ osu_run       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ sample_id     : Factor w/ 540 levels "DBT_1","DBT_10",..: 101 112 123 134 145 156 167 178 189 102 ...
##  $ date          : Factor w/ 46 levels "04/27/2015","04/28/2015",..: 3 5 3 3 3 4 3 3 3 4 ...
##  $ site_code     : Factor w/ 36 levels "BG","BG502","BG505",..: 1 5 12 17 20 21 22 25 28 34 ...
##  $ site_abbrv    : Factor w/ 11 levels "BG","DS","FSR",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ site          : Factor w/ 11 levels "Badger","Don Scott",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ initial_weight: num  3 3 3 3 3 1 3 3 3 1 ...
##  $ final_weight  : num  1.99 2 2 1.98 2 ...
##  $ units         : Factor w/ 1 level "ng/g": 1 1 1 1 1 1 1 1 1 1 ...
##  $ media         : Factor w/ 10 levels "dead_bee_traps",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ neonic        : Factor w/ 7 levels "dinotefuran",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ conc          : chr  "< 0.040" "< 0.040" "< 0.040" "0.976" ...
##  $ detected      : num  0 0 0 1 0 0 0 0 0 1 ...
##  $ conc2         : num  0.04 0.04 0.04 0.976 0.04 0.04 0.04 0.04 0.04 0.925 ...
#aggregate(osu_concs$conc2, list(osu_concs$neonic), mean)
summary(osu_concs)
##  original_order     osu_run         sample_id            date     
##  Min.   :  1.0   Min.   :  1.00   DBT_1  :   7   05/05/2015: 203  
##  1st Qu.:135.8   1st Qu.: 34.00   DBT_10 :   7   04/29/2015: 154  
##  Median :270.5   Median : 68.00   DBT_100:   7   9/17/2015 : 147  
##  Mean   :270.5   Mean   : 79.59   DBT_11 :   7   05/02/2015: 140  
##  3rd Qu.:405.2   3rd Qu.:106.00   DBT_12 :   7   05/08/2015: 140  
##  Max.   :540.0   Max.   :231.00   DBT_13 :   7   05/11/2015: 140  
##                                   (Other):3738   (Other)   :2856  
##    site_code      site_abbrv         site     initial_weight  
##  DS     : 252   DS     :714   Don Scott:714   Min.   : 0.500  
##  HR     : 252   TV     :546   Three Vs :546   1st Qu.: 3.000  
##  TV     : 252   FSR    :427   FSR      :427   Median : 3.400  
##  BG     : 245   HR     :399   Honeyrun :399   Mean   : 4.420  
##  IB     : 245   BG     :392   Badger   :392   3rd Qu.: 4.925  
##  MM     : 245   MM     :357   Moorman  :357   Max.   :16.600  
##  (Other):2289   (Other):945   (Other)  :945                   
##   final_weight     units                     media              neonic   
##  Min.   :0.2083   ng/g:3780   dead_bee_traps    :700   dinotefuran :540  
##  1st Qu.:1.4212               nuc_box_pollen    :700   nitenpyram  :540  
##  Median :1.9900               field_pollen      :658   thiamethoxam:540  
##  Mean   :1.9645               nuc_box_nurse_bees:651   clothianidin:540  
##  3rd Qu.:2.0725               nuc_box_larvae    :266   imidacloprid:540  
##  Max.   :5.9800               in_hive_bee_bread :196   acetamiprid :540  
##                               (Other)           :609   thiacloprid :540  
##      conc              detected          conc2         
##  Length:3780        Min.   :0.0000   Min.   :   0.008  
##  Class :character   1st Qu.:0.0000   1st Qu.:   0.122  
##  Mode  :character   Median :0.0000   Median :   0.419  
##                     Mean   :0.3712   Mean   :   4.851  
##                     3rd Qu.:1.0000   3rd Qu.:   2.253  
##                     Max.   :1.0000   Max.   :1874.970  
##                                      NA's   :14
neonics <- unique(osu_concs$neonic)
medias <- unique(osu_concs$media)
sites <- unique(osu_concs$site)
site_abbrvs <- unique(osu_concs$site_abbrv)
plot_colors <- c("aquamarine4", "bisque", "blue3", "brown", "chartreuse4",
                 "cornflowerblue", "darkgoldenrod3", "darkkhaki", "darkmagenta", "darkorange1")
for(i in 1:10){ #10
  selected_media <- medias[i]
  hist_medias <- which(osu_concs$media==selected_media) #7
  for(j in 1:7){
    selected_neonic <- neonics[j]
    hist_neonics <- which(osu_concs$neonic==selected_neonic)
    hist_intersect <- intersect(hist_neonics, hist_medias)
    hist_extract <- osu_concs[hist_intersect,]
    plot_title = paste(selected_neonic, "in", selected_media)
    n_data <- dim(hist_extract)[[1]]
    n_detects <- sum(hist_extract$detected)
    min_data <- round(min(hist_extract$conc2), digits=3)
    mean_data <- round(mean(hist_extract$conc2), digits=3)
    max_data <- round(max(hist_extract$conc2), digits=3)
    x_label = paste("ng/g (min, mean, max =", min_data, ",", mean_data,",",max_data,")","\n",
                    "detection frequency =", n_detects, "/", n_data)
    #plot overall histogram for media and neonic combination
    hist(hist_extract$conc2, main = plot_title, xlab=x_label, ylab="Frequency", col=plot_colors[i])
    #plot sideways boxplot by site for each media and neonic combination
    boxplot(conc2~site_abbrv,data=hist_extract, main=plot_title, horizontal=TRUE,las=2, col=plot_colors[i])
    #plot sideways barchart of detection proportion by site for each media and neonic combination
    detfreq.df <- data.frame(site=character(),
                  Doubles=double(),
                  count=integer(),
                  Factors=double(),
                  stringsAsFactors=FALSE)
    for(k in 1:11){
      selected_site_abbrv <- site_abbrvs[k]
      which_extract_site <- which(hist_extract$site_abbrv==selected_site_abbrv)
      site_extract <- hist_extract[which_extract_site,]
      n_data_site <- dim(hist_extract)[[1]]
      n_detects_site <- sum(hist_extract$detected)
      if(n_data_site >0){
        det_prop_site <- n_detects_site/n_data_site
      }else{
        det_prop_site <- 0
      }
    }
    #plot time series by site for each media and neonic combination
  }
}