notes1

Import and check data

 library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

edidiv <- read.csv("edidiv.csv")  # This is the file path based on where I saved the data, your filepath will be different

head(edidiv)                # Displays the first few rows

##                      organisationName gridReference year         taxonName
## 1 Joint Nature Conservation Committee      NT265775 2000    Sterna hirundo
## 2 Joint Nature Conservation Committee      NT235775 2000    Sterna hirundo
## 3 Joint Nature Conservation Committee      NT235775 2000 Sterna paradisaea
## 4       British Trust for Ornithology          NT27 2000 Branta canadensis
## 5       British Trust for Ornithology          NT27 2000  Branta leucopsis
## 6     The Wildlife Information Centre         NT27S 2001     Turdus merula
##   taxonGroup
## 1       Bird
## 2       Bird
## 3       Bird
## 4       Bird
## 5       Bird
## 6       Bird

tail(edidiv)                # Displays the last rows

##                            organisationName gridReference year
## 25679                    The Mammal Society      NT278745 2016
## 25680                    The Mammal Society      NT277724 2016
## 25681                    The Mammal Society      NT266728 2016
## 25682                    The Mammal Society      NT270728 2016
## 25683                    The Mammal Society      NT257762 2016
## 25684 People's Trust for Endangered Species        NT2372 2016
##                   taxonName taxonGroup
## 25679  Sciurus carolinensis     Mammal
## 25680   Capreolus capreolus     Mammal
## 25681  Sciurus carolinensis     Mammal
## 25682 Oryctolagus cuniculus     Mammal
## 25683         Vulpes vulpes     Mammal
## 25684   Erinaceus europaeus     Mammal

str(edidiv)                 # Tells you whether the variables are continuous, integers, categorical or characters

## 'data.frame':    25684 obs. of  5 variables:
##  $ organisationName: chr  "Joint Nature Conservation Committee" "Joint Nature Conservation Committee" "Joint Nature Conservation Committee" "British Trust for Ornithology" ...
##  $ gridReference   : chr  "NT265775" "NT235775" "NT235775" "NT27" ...
##  $ year            : int  2000 2000 2000 2000 2000 2001 2001 2001 2001 2001 ...
##  $ taxonName       : chr  "Sterna hirundo" "Sterna hirundo" "Sterna paradisaea" "Branta canadensis" ...
##  $ taxonGroup      : chr  "Bird" "Bird" "Bird" "Bird" ...

head(edidiv$taxonGroup)     # Displays the first few rows of this column only

## [1] "Bird" "Bird" "Bird" "Bird" "Bird" "Bird"

class(edidiv$taxonGroup)    # Tells you what type of variable we're dealing with: it's character now but we want it to be a factor

## [1] "character"

Set Classes

edidiv$taxonGroup <- as.factor(edidiv$taxonGroup)     # What are we doing here?!
class(edidiv$taxonGroup)

## [1] "factor"

# More exploration
dim(edidiv)                 # Displays number of rows and columns

## [1] 25684     5

summary(edidiv)             # Gives you a summary of the data

##  organisationName   gridReference           year       taxonName        
##  Length:25684       Length:25684       Min.   :2000   Length:25684      
##  Class :character   Class :character   1st Qu.:2006   Class :character  
##  Mode  :character   Mode  :character   Median :2009   Mode  :character  
##                                        Mean   :2009                     
##                                        3rd Qu.:2011                     
##                                        Max.   :2016                     
##                                                                         
##             taxonGroup  
##  Butterfly       :9670  
##  Bird            :7366  
##  Flowering.Plants:2625  
##  Mollusc         :2226  
##  Hymenopteran    :1391  
##  Mammal          : 960  
##  (Other)         :1446

summary(edidiv$taxonGroup)  # Gives you a summary of that particular variable (column) in your dataset

##           Beetle             Bird        Butterfly        Dragonfly 
##              426             7366             9670              421 
## Flowering.Plants           Fungus     Hymenopteran           Lichen 
##             2625              334             1391              140 
##        Liverwort           Mammal          Mollusc 
##              125              960             2226

how many species were recorded in each taxonomic group?

Beetle <- filter(edidiv, taxonGroup == "Beetle")
# The first argument of the function is the data frame, the second argument is the condition you want to filter on. Because we only want the beetles here, we say: the variable taxonGroup MUST BE EXACTLY (==) Beetle - drop everything else from the dataset. (R is case-sensitive so it's important to watch your spelling! "beetle" or "Beetles" would not have worked here.)

Bird <- filter(edidiv, taxonGroup == "Bird")   # We do the same with birds. It's very similar to filtering in Excel if you are used to it.
# You can create the objects for the remaining taxa. If you need to remind yourself of the names and spellings, type summary(edidiv$taxonGroup)

Once you have created objects for each taxon, we can calculate species richness, i.e. the number of different species in each group.

a <- length(unique(Beetle$taxonName))
b <- length(unique(Bird$taxonName))
# You can choose whatever names you want for your objects, here I used a, b, c, d... for the sake of brevity.

biodiv <- c(a,b)    # We are chaining together all the values; pay attention to the object names you have calculated and their order
names(biodiv) <- c("Beetle",
                   "Bird")

barplot(biodiv)

# Creating an object called "taxa" that contains all the taxa names
taxa <- c("Beetle",
          "Bird",
          "Butterfly",
          "Dragonfly",
          "Flowering.Plants",
          "Fungus",
          "Hymenopteran",
          "Lichen",
          "Liverwort",
          "Mammal",
          "Mollusc")
# Turning this object into a factor, i.e. a categorical variable
taxa_f <- factor(taxa)

# Combining all the values for the number of species in an object called richness
richness <- c(a,b,12,15,a,b,12,15,a,b,70)

# Creating the data frame from the two vectors
biodata <- data.frame(taxa_f, richness)

# Saving the file
write.csv(biodata, file="biodata.csv")  # it will be saved in your working directory