General descriptive statistics for 2015 rare data collected by Ohio State.
The git repo holding this code is available at: https://github.com/puruckertom/rare_pollen
Version and installed libraries.
verbose_output = TRUE
if(verbose_output){
print(Sys.info()[4])
R.Version()$version.string
}
## nodename
## "DZ2626UTPURUCKE"
## [1] "R version 3.3.2 (2016-10-31)"
#check to see if needed packages are installed
list.of.packages <- c("ggplot2", "dplyr")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
#load relevant packages
library(MASS)
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.3.3
if(verbose_output){
print("list of loaded packages: ")
print((.packages()))
}
## [1] "list of loaded packages: "
## [1] "reshape2" "ggplot2" "dplyr" "MASS" "stats"
## [6] "graphics" "grDevices" "utils" "datasets" "methods"
## [11] "base"
Original file with observational data: https://github.com/puruckertom/rare_pollen/blob/master/data_in/rare_data_osu_2015.csv
##################
#the data sets
##################
#import raw data - everything
file.exists(paste(osu_data,"rare_data_osu_2015.csv",sep=""))
## [1] TRUE
osu_obs <- read.table(paste(osu_data,"rare_data_osu_2015.csv",sep=""), header = TRUE, sep = ",")
str(osu_obs)
## 'data.frame': 540 obs. of 18 variables:
## $ original_order: int 1 2 3 4 5 6 7 8 9 10 ...
## $ osu_run : int 1 2 3 4 5 6 7 8 9 10 ...
## $ sample_id : Factor w/ 540 levels "DBT_1","DBT_10",..: 101 112 123 134 145 156 167 178 189 102 ...
## $ date : Factor w/ 46 levels "04/27/2015","04/28/2015",..: 3 5 3 3 3 4 3 3 3 4 ...
## $ site_code : Factor w/ 36 levels "BG","BG502","BG505",..: 1 5 12 17 20 21 22 25 28 34 ...
## $ site_abbrv : Factor w/ 11 levels "BG","DS","FSR",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ site : Factor w/ 11 levels "Badger","Don Scott",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ initial_weight: num 3 3 3 3 3 1 3 3 3 1 ...
## $ final_weight : num 1.99 2 2 1.98 2 ...
## $ units : Factor w/ 1 level "ng/g": 1 1 1 1 1 1 1 1 1 1 ...
## $ media : Factor w/ 10 levels "dead_bee_traps",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ dinotefuran : Factor w/ 85 levels "< 0.029","< 0.040",..: 2 2 2 46 2 2 2 2 2 45 ...
## $ nitenpyram : Factor w/ 108 levels "< 0.061","< 0.078",..: 38 47 9 30 9 9 9 9 55 9 ...
## $ thiamethoxam : Factor w/ 239 levels "< 0.028","< 0.057",..: 5 175 5 5 135 5 67 5 143 223 ...
## $ clothianidin : Factor w/ 221 levels "< 0.373","< 0.402",..: 115 198 92 15 114 3 119 3 27 180 ...
## $ imidacloprid : Factor w/ 256 levels "< 0.026","< 0.032",..: 5 61 54 5 215 5 5 200 5 248 ...
## $ acetamiprid : Factor w/ 212 levels "< 0.027","< 0.029",..: 5 126 55 5 101 95 116 5 5 147 ...
## $ thiacloprid : Factor w/ 284 levels "< 0.024","< 0.031",..: 186 28 88 189 5 177 151 187 5 217 ...
#melt to long format
keepers <- colnames(osu_obs)[1:11] #through media column
osu_concs <- melt(osu_obs, id.vars = keepers, variable.name = "neonic", value.name = "conc")
## Warning: attributes are not identical across measure variables; they will
## be dropped
dim(osu_concs)
## [1] 3780 13
colnames(osu_concs)
## [1] "original_order" "osu_run" "sample_id" "date"
## [5] "site_code" "site_abbrv" "site" "initial_weight"
## [9] "final_weight" "units" "media" "neonic"
## [13] "conc"
#add detected field and numerical concentration
#osu_concs$conc
osu_concs$detected <- ifelse(grepl("<", osu_concs$conc),0,1)
osu_concs$conc2 <- as.numeric(gsub("< ", "", osu_concs$conc))
#View(osu_concs)
Summary statistics
str(osu_concs)
## 'data.frame': 3780 obs. of 15 variables:
## $ original_order: int 1 2 3 4 5 6 7 8 9 10 ...
## $ osu_run : int 1 2 3 4 5 6 7 8 9 10 ...
## $ sample_id : Factor w/ 540 levels "DBT_1","DBT_10",..: 101 112 123 134 145 156 167 178 189 102 ...
## $ date : Factor w/ 46 levels "04/27/2015","04/28/2015",..: 3 5 3 3 3 4 3 3 3 4 ...
## $ site_code : Factor w/ 36 levels "BG","BG502","BG505",..: 1 5 12 17 20 21 22 25 28 34 ...
## $ site_abbrv : Factor w/ 11 levels "BG","DS","FSR",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ site : Factor w/ 11 levels "Badger","Don Scott",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ initial_weight: num 3 3 3 3 3 1 3 3 3 1 ...
## $ final_weight : num 1.99 2 2 1.98 2 ...
## $ units : Factor w/ 1 level "ng/g": 1 1 1 1 1 1 1 1 1 1 ...
## $ media : Factor w/ 10 levels "dead_bee_traps",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ neonic : Factor w/ 7 levels "dinotefuran",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ conc : chr "< 0.040" "< 0.040" "< 0.040" "0.976" ...
## $ detected : num 0 0 0 1 0 0 0 0 0 1 ...
## $ conc2 : num 0.04 0.04 0.04 0.976 0.04 0.04 0.04 0.04 0.04 0.925 ...
#aggregate(osu_concs$conc2, list(osu_concs$neonic), mean)
summary(osu_concs)
## original_order osu_run sample_id date
## Min. : 1.0 Min. : 1.00 DBT_1 : 7 05/05/2015: 203
## 1st Qu.:135.8 1st Qu.: 34.00 DBT_10 : 7 04/29/2015: 154
## Median :270.5 Median : 68.00 DBT_100: 7 9/17/2015 : 147
## Mean :270.5 Mean : 79.59 DBT_11 : 7 05/02/2015: 140
## 3rd Qu.:405.2 3rd Qu.:106.00 DBT_12 : 7 05/08/2015: 140
## Max. :540.0 Max. :231.00 DBT_13 : 7 05/11/2015: 140
## (Other):3738 (Other) :2856
## site_code site_abbrv site initial_weight
## DS : 252 DS :714 Don Scott:714 Min. : 0.500
## HR : 252 TV :546 Three Vs :546 1st Qu.: 3.000
## TV : 252 FSR :427 FSR :427 Median : 3.400
## BG : 245 HR :399 Honeyrun :399 Mean : 4.420
## IB : 245 BG :392 Badger :392 3rd Qu.: 4.925
## MM : 245 MM :357 Moorman :357 Max. :16.600
## (Other):2289 (Other):945 (Other) :945
## final_weight units media neonic
## Min. :0.2083 ng/g:3780 dead_bee_traps :700 dinotefuran :540
## 1st Qu.:1.4212 nuc_box_pollen :700 nitenpyram :540
## Median :1.9900 field_pollen :658 thiamethoxam:540
## Mean :1.9645 nuc_box_nurse_bees:651 clothianidin:540
## 3rd Qu.:2.0725 nuc_box_larvae :266 imidacloprid:540
## Max. :5.9800 in_hive_bee_bread :196 acetamiprid :540
## (Other) :609 thiacloprid :540
## conc detected conc2
## Length:3780 Min. :0.0000 Min. : 0.008
## Class :character 1st Qu.:0.0000 1st Qu.: 0.122
## Mode :character Median :0.0000 Median : 0.419
## Mean :0.3712 Mean : 4.851
## 3rd Qu.:1.0000 3rd Qu.: 2.253
## Max. :1.0000 Max. :1874.970
## NA's :14
neonics <- unique(osu_concs$neonic)
medias <- unique(osu_concs$media)
sites <- unique(osu_concs$site)
site_abbrvs <- unique(osu_concs$site_abbrv)
plot_colors <- c("aquamarine4", "bisque", "blue3", "brown", "chartreuse4",
"cornflowerblue", "darkgoldenrod3", "darkkhaki", "darkmagenta", "darkorange1")
for(i in 1:10){ #10
selected_media <- medias[i]
hist_medias <- which(osu_concs$media==selected_media) #7
for(j in 1:7){
selected_neonic <- neonics[j]
hist_neonics <- which(osu_concs$neonic==selected_neonic)
hist_intersect <- intersect(hist_neonics, hist_medias)
hist_extract <- osu_concs[hist_intersect,]
plot_title = paste(selected_neonic, "in", selected_media)
n_data <- dim(hist_extract)[[1]]
n_detects <- sum(hist_extract$detected)
min_data <- round(min(hist_extract$conc2), digits=3)
mean_data <- round(mean(hist_extract$conc2), digits=3)
max_data <- round(max(hist_extract$conc2), digits=3)
x_label = paste("ng/g (min, mean, max =", min_data, ",", mean_data,",",max_data,")","\n",
"detection frequency =", n_detects, "/", n_data)
#plot overall histogram for media and neonic combination
hist(hist_extract$conc2, main = plot_title, xlab=x_label, ylab="Frequency", col=plot_colors[i])
#plot sideways boxplot by site for each media and neonic combination
boxplot(conc2~site_abbrv,data=hist_extract, main=plot_title, horizontal=TRUE,las=2, col=plot_colors[i])
#plot sideways barchart of detection proportion by site for each media and neonic combination
detfreq.df <- data.frame(site=character(),
Doubles=double(),
count=integer(),
Factors=double(),
stringsAsFactors=FALSE)
for(k in 1:11){
selected_site_abbrv <- site_abbrvs[k]
which_extract_site <- which(hist_extract$site_abbrv==selected_site_abbrv)
site_extract <- hist_extract[which_extract_site,]
n_data_site <- dim(hist_extract)[[1]]
n_detects_site <- sum(hist_extract$detected)
if(n_data_site >0){
det_prop_site <- n_detects_site/n_data_site
}else{
det_prop_site <- 0
}
}
#plot time series by site for each media and neonic combination
}
}