final_assessment_dataset <- read.csv("C:/Users/ruth/Downloads/final_assessment_dataset.csv")
View(final_assessment_dataset)
str(final_assessment_dataset)
## 'data.frame':    607 obs. of  10 variables:
##  $ farm              : chr  "Benson" "Benson" "Benson" "Benson" ...
##  $ latitude          : chr  "N51:35:53" "N51:35:53" "N51:35:53" "N51:35:53" ...
##  $ longitude         : chr  "W1:05:37" "W1:05:37" "W1:05:37" "W1:05:37" ...
##  $ pct_flower        : int  10 10 10 10 25 30 30 10 10 50 ...
##  $ temp              : int  27 27 22 22 25 25 25 25 25 12 ...
##  $ variety           : chr  "Variety_1" "Variety_1" "Variety_1" "Variety_1" ...
##  $ type              : chr  "Hybrid" "Hybrid" "Hybrid" "Hybrid" ...
##  $ species           : chr  "Andrena_carantonica" "Andrena_nigroaenea" "Andrena_pubescens" "Apis_mellifera" ...
##  $ group             : chr  "Solitary bee" "Solitary bee" "Solitary bee" "Honeybee" ...
##  $ relative_abundance: num  0.5 0.5 0.5 0.5 0.5 ...
dim(final_assessment_dataset)
## [1] 607  10
class(final_assessment_dataset)
## [1] "data.frame"
colSums(is.na(final_assessment_dataset))
##               farm           latitude          longitude         pct_flower 
##                  0                  0                  0                  1 
##               temp            variety               type            species 
##                  0                  0                  0                  0 
##              group relative_abundance 
##                  0                  0

There is one missing value. We’ll impute it with the mean of the data in that column. #Simple Imputation

final_assessment_dataset$pct_flower[which(is.na(final_assessment_dataset$pct_flower))] = mean(final_assessment_dataset$pct_flower, na.rm = TRUE)
colSums(is.na(final_assessment_dataset))
##               farm           latitude          longitude         pct_flower 
##                  0                  0                  0                  0 
##               temp            variety               type            species 
##                  0                  0                  0                  0 
##              group relative_abundance 
##                  0                  0

UNIVARIATE ANALYSIS

summary(final_assessment_dataset)
##      farm             latitude          longitude           pct_flower   
##  Length:607         Length:607         Length:607         Min.   : 0.00  
##  Class :character   Class :character   Class :character   1st Qu.: 5.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :10.00  
##                                                           Mean   :16.88  
##                                                           3rd Qu.:25.00  
##                                                           Max.   :65.00  
##       temp         variety              type             species         
##  Min.   :12.00   Length:607         Length:607         Length:607        
##  1st Qu.:16.00   Class :character   Class :character   Class :character  
##  Median :22.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :20.14                                                           
##  3rd Qu.:25.00                                                           
##  Max.   :27.00                                                           
##     group           relative_abundance
##  Length:607         Min.   :0.09091   
##  Class :character   1st Qu.:0.25000   
##  Mode  :character   Median :0.50000   
##                     Mean   :0.46674   
##                     3rd Qu.:0.65152   
##                     Max.   :1.00000
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.2     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
numerical_adv <- final_assessment_dataset %>% select(4, 5, 10)
summary(numerical_adv)
##    pct_flower         temp       relative_abundance
##  Min.   : 0.00   Min.   :12.00   Min.   :0.09091   
##  1st Qu.: 5.00   1st Qu.:16.00   1st Qu.:0.25000   
##  Median :10.00   Median :22.00   Median :0.50000   
##  Mean   :16.88   Mean   :20.14   Mean   :0.46674   
##  3rd Qu.:25.00   3rd Qu.:25.00   3rd Qu.:0.65152   
##  Max.   :65.00   Max.   :27.00   Max.   :1.00000
str(numerical_adv)
## 'data.frame':    607 obs. of  3 variables:
##  $ pct_flower        : num  10 10 10 10 25 30 30 10 10 50 ...
##  $ temp              : int  27 27 22 22 25 25 25 25 25 12 ...
##  $ relative_abundance: num  0.5 0.5 0.5 0.5 0.5 ...

Graphical analysis

for honeey bee only

honeydf <- final_assessment_dataset %>% 
  select(farm, temp, pct_flower, group)%>%
  filter(group == "Honeybee")
honeydf
##           farm temp pct_flower    group
## 1       Benson   22   10.00000 Honeybee
## 2     Fulborne   25   10.00000 Honeybee
## 3     Fulborne   25   10.00000 Honeybee
## 4       Orford   14   50.00000 Honeybee
## 5       Orford   14   50.00000 Honeybee
## 6       Benson   27   10.00000 Honeybee
## 7       Benson   13   50.00000 Honeybee
## 8       Benson   13   50.00000 Honeybee
## 9     Fulborne   25   10.00000 Honeybee
## 10    Fulborne   25   10.00000 Honeybee
## 11    Fulborne   25   10.00000 Honeybee
## 12    Fulborne   25   10.00000 Honeybee
## 13    Fulborne   25   10.00000 Honeybee
## 14    Fulborne   25   10.00000 Honeybee
## 15      Orford   14   50.00000 Honeybee
## 16      Orford   14   50.00000 Honeybee
## 17    Fulborne   25   10.00000 Honeybee
## 18    Fulborne   25   10.00000 Honeybee
## 19    Fulborne   25   10.00000 Honeybee
## 20  Horncastle   22    0.00000 Honeybee
## 21  Horncastle   12   50.00000 Honeybee
## 22  Horncastle   22    0.00000 Honeybee
## 23      Benson   27   10.00000 Honeybee
## 24      Benson   27   10.00000 Honeybee
## 25      Benson   27   10.00000 Honeybee
## 26    Fulborne   25   10.00000 Honeybee
## 27    Fulborne   25   10.00000 Honeybee
## 28    Fulborne   25   10.00000 Honeybee
## 29  Horncastle   22    0.00000 Honeybee
## 30      Orford   14   50.00000 Honeybee
## 31      Benson   22   10.00000 Honeybee
## 32      Benson   22   10.00000 Honeybee
## 33      Benson   27   10.00000 Honeybee
## 34      Benson   27   10.00000 Honeybee
## 35      Benson   27   10.00000 Honeybee
## 36      Benson   27   10.00000 Honeybee
## 37    Fulborne   16    0.00000 Honeybee
## 38    Fulborne   25   10.00000 Honeybee
## 39    Fulborne   25   10.00000 Honeybee
## 40    Fulborne   25   10.00000 Honeybee
## 41    Fulborne   25   10.00000 Honeybee
## 42    Fulborne   25   10.00000 Honeybee
## 43    Fulborne   25   10.00000 Honeybee
## 44      Benson   13   50.00000 Honeybee
## 45      Benson   27   10.00000 Honeybee
## 46    Fulborne   25   10.00000 Honeybee
## 47    Fulborne   25   10.00000 Honeybee
## 48    Fulborne   25   10.00000 Honeybee
## 49    Fulborne   16   16.88119 Honeybee
## 50  Horncastle   22    0.00000 Honeybee
## 51      Orford   14   50.00000 Honeybee
## 52      Benson   27   10.00000 Honeybee
## 53      Benson   27   10.00000 Honeybee
## 54    Fulborne   25   10.00000 Honeybee
## 55    Fulborne   25   10.00000 Honeybee
## 56    Fulborne   25   10.00000 Honeybee
## 57    Fulborne   25   10.00000 Honeybee
## 58    Fulborne   25   10.00000 Honeybee
## 59    Fulborne   16    0.00000 Honeybee
## 60  Horncastle   22    0.00000 Honeybee
## 61    Fulborne   25   10.00000 Honeybee
## 62    Fulborne   25   10.00000 Honeybee
## 63    Fulborne   25   10.00000 Honeybee
## 64    Fulborne   16    0.00000 Honeybee
## 65  Horncastle   22    0.00000 Honeybee
## 66      Benson   27   10.00000 Honeybee
## 67      Benson   22   10.00000 Honeybee
## 68      Benson   27   10.00000 Honeybee
## 69      Benson   27   10.00000 Honeybee
## 70      Benson   27   10.00000 Honeybee
## 71    Fulborne   25   10.00000 Honeybee
## 72    Fulborne   25   10.00000 Honeybee
## 73    Fulborne   25   10.00000 Honeybee
## 74    Fulborne   25   10.00000 Honeybee
## 75    Fulborne   25   10.00000 Honeybee
## 76    Fulborne   25   10.00000 Honeybee
## 77    Fulborne   25   10.00000 Honeybee
## 78    Fulborne   25   10.00000 Honeybee
## 79    Fulborne   16    0.00000 Honeybee
## 80    Fulborne   16    0.00000 Honeybee
## 81    Fulborne   25   10.00000 Honeybee
## 82    Fulborne   16    0.00000 Honeybee
## 83    Fulborne   25   10.00000 Honeybee
## 84    Fulborne   25   10.00000 Honeybee
## 85      Orford   14   50.00000 Honeybee
## 86    Fulborne   25   10.00000 Honeybee
## 87    Fulborne   16    0.00000 Honeybee
## 88      Benson   27   10.00000 Honeybee
## 89    Fulborne   16    0.00000 Honeybee
## 90    Fulborne   16    0.00000 Honeybee
## 91    Fulborne   16    0.00000 Honeybee
## 92    Fulborne   25   10.00000 Honeybee
## 93    Fulborne   16    0.00000 Honeybee
## 94    Fulborne   25   10.00000 Honeybee
## 95    Fulborne   25   10.00000 Honeybee
## 96    Fulborne   25   10.00000 Honeybee
## 97    Fulborne   25   10.00000 Honeybee
## 98  Horncastle   22    0.00000 Honeybee
## 99      Orford   14   50.00000 Honeybee
## 100     Orford   14   50.00000 Honeybee
## 101   Fulborne   16    0.00000 Honeybee
## 102   Fulborne   16    0.00000 Honeybee
## 103   Fulborne   25   10.00000 Honeybee
## 104   Fulborne   25   10.00000 Honeybee
## 105   Fulborne   25   10.00000 Honeybee
## 106   Fulborne   25   10.00000 Honeybee
## 107 Horncastle   22    0.00000 Honeybee
## 108 Horncastle   12   50.00000 Honeybee
## 109     Orford   14   50.00000 Honeybee
## 110     Orford   14   50.00000 Honeybee
## 111     Benson   27   10.00000 Honeybee
## 112   Fulborne   16    0.00000 Honeybee
## 113   Fulborne   25   10.00000 Honeybee
## 114   Fulborne   25   10.00000 Honeybee
## 115   Fulborne   25   10.00000 Honeybee
## 116   Fulborne   25   10.00000 Honeybee
## 117   Fulborne   16    0.00000 Honeybee
## 118   Fulborne   16    0.00000 Honeybee
## 119 Horncastle   22    0.00000 Honeybee
## 120 Horncastle   22    0.00000 Honeybee
## 121 Horncastle   22    0.00000 Honeybee
## 122 Horncastle   12   50.00000 Honeybee
## 123     Benson   27   10.00000 Honeybee
## 124   Fulborne   25   10.00000 Honeybee
## 125   Fulborne   25   10.00000 Honeybee
## 126   Fulborne   25   10.00000 Honeybee
## 127   Fulborne   25   10.00000 Honeybee
## 128   Fulborne   25   10.00000 Honeybee
## 129 Horncastle   12   50.00000 Honeybee
## 130 Horncastle   12   50.00000 Honeybee
## 131     Benson   27   10.00000 Honeybee
## 132     Benson   22   10.00000 Honeybee
## 133   Fulborne   25   10.00000 Honeybee
## 134   Fulborne   25   10.00000 Honeybee
## 135   Fulborne   25   10.00000 Honeybee
## 136   Fulborne   25   10.00000 Honeybee
## 137   Fulborne   25   10.00000 Honeybee
## 138   Fulborne   16    0.00000 Honeybee
## 139   Fulborne   16    0.00000 Honeybee
## 140   Fulborne   16    0.00000 Honeybee
## 141     Orford   16    5.00000 Honeybee
## 142     Benson   27   10.00000 Honeybee
## 143     Benson   22   10.00000 Honeybee
## 144   Fulborne   25   10.00000 Honeybee
## 145   Fulborne   25   10.00000 Honeybee
## 146   Fulborne   25   10.00000 Honeybee
## 147   Fulborne   25   10.00000 Honeybee
## 148   Fulborne   25   10.00000 Honeybee
## 149 Horncastle   22    0.00000 Honeybee
## 150 Horncastle   12   50.00000 Honeybee
## 151     Orford   14   50.00000 Honeybee
## 152     Orford   14   50.00000 Honeybee
## 153     Benson   27   10.00000 Honeybee
## 154     Benson   27   10.00000 Honeybee
## 155     Benson   22   10.00000 Honeybee
## 156     Benson   27   10.00000 Honeybee
## 157   Fulborne   25   10.00000 Honeybee
## 158   Fulborne   25   10.00000 Honeybee
## 159   Fulborne   25   10.00000 Honeybee
## 160   Fulborne   16    0.00000 Honeybee
## 161   Fulborne   25   10.00000 Honeybee
## 162   Fulborne   25   10.00000 Honeybee
## 163   Fulborne   25   10.00000 Honeybee
## 164   Fulborne   25   10.00000 Honeybee
## 165   Fulborne   25   10.00000 Honeybee
## 166   Fulborne   25   10.00000 Honeybee
## 167   Fulborne   25   10.00000 Honeybee
## 168   Fulborne   25   10.00000 Honeybee
## 169   Fulborne   25   10.00000 Honeybee
## 170 Horncastle   22    0.00000 Honeybee
## 171 Horncastle   22    0.00000 Honeybee
## 172 Horncastle   22    0.00000 Honeybee
## 173 Horncastle   22    0.00000 Honeybee
## 174 Horncastle   22    0.00000 Honeybee
## 175 Horncastle   22    0.00000 Honeybee
## 176 Horncastle   22    0.00000 Honeybee
## 177     Orford   14   50.00000 Honeybee
## 178   Fulborne   25   10.00000 Honeybee
## 179   Fulborne   25   10.00000 Honeybee
## 180   Fulborne   25   10.00000 Honeybee
## 181 Horncastle   22    0.00000 Honeybee
## 182 Horncastle   22    0.00000 Honeybee
## 183 Horncastle   22    0.00000 Honeybee
## 184 Horncastle   22    0.00000 Honeybee
## 185 Horncastle   22    0.00000 Honeybee
library(ggplot2)

# grouped bar plot preserving zero count bars
ggplot(honeydf, aes(x=group, fill=farm
         )) + 
  geom_bar(position = position_dodge(preserve = "single"))

ggplot(honeydf, 
       aes(y = pct_flower, 
           x = temp,
           fill= group)) +
  geom_point(color="cornflowerblue", 
             size = 2, 
             alpha=.8) + 
  labs(y = "flower percentage",
       x = "temp",
       title = "Honeybee flower percentage vs temperature")

plot(honeydf)

select numeric variables

df <- dplyr::select_if(final_assessment_dataset, is.numeric)

# calulate the correlations
r <- cor(df, use="complete.obs")
round(r,2)
##                    pct_flower  temp relative_abundance
## pct_flower               1.00 -0.20               0.05
## temp                    -0.20  1.00               0.07
## relative_abundance       0.05  0.07               1.00
library(ggplot2)
library(ggcorrplot)
ggcorrplot(r, hc.order = TRUE, type = "full", lab = TRUE,
   outline.col = "white",
   ggtheme = ggplot2::theme_gray)

None of the numeric values show any correlation