Taller 1 - R Basics

Iniciamos con la instalación de la librería “dplyr” y retificación de la direccion de nuestra carpeta de trabajo.

# Coding Club Workshop 1 - R Basics
# Learning how to import and explore data, and make graphs about Edinburgh's biodiversity
# Written by Andrea Galindo 08/03/2020 National University of Colombia

library(dplyr)

## Warning: package 'dplyr' was built under R version 3.6.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

getwd()

## [1] "D:/Mis documentos/RStudio Works"

Seguimos con la importación de la base de datos a trabajar y uso de comandos para facilitar su identificación.

edidiv <- read.csv("D:/Mis documentos/RStudio Works/edidiv_.csv")

head(edidiv)

# Displays the first few rows
tail(edidiv)

# Displays the last rows
str(edidiv)

## 'data.frame':    25684 obs. of  5 variables:
##  $ organisationName: Factor w/ 28 levels "BATS & The Millennium Link",..: 14 14 14 8 8 28 28 28 28 28 ...
##  $ gridReference   : Factor w/ 1938 levels "NT200701","NT200712",..: 1314 569 569 1412 1412 1671 1671 1671 1671 1671 ...
##  $ year            : int  2000 2000 2000 2000 2000 2001 2001 2001 2001 2001 ...
##  $ taxonName       : Factor w/ 1275 levels "Acarospora fuscata",..: 1126 1126 1127 192 193 1202 365 977 472 947 ...
##  $ taxonGroup      : Factor w/ 11 levels "Beetle","Bird",..: 2 2 2 2 2 2 2 2 2 2 ...

# Tells you whether the variables are continuous, integers, categorical or characters

Los siguientes son comandos de exploración de los datos.

head(edidiv$taxonGroup)     # Displays the first few rows of this column only

## [1] Bird Bird Bird Bird Bird Bird
## 11 Levels: Beetle Bird Butterfly Dragonfly Flowering.Plants ... Mollusc

class(edidiv$taxonGroup)    # Tells you what type of variable we're dealing with: it's character now but we want it to be a factor

## [1] "factor"

edidiv$taxonGroup <- as.factor(edidiv$taxonGroup)     # What are we doing here?!

# More exploration
dim(edidiv)                 # Displays number of rows and columns

## [1] 25684     5

summary(edidiv)             # Gives you a summary of the data

##                                              organisationName gridReference  
##  Biological Records Centre                           :6744    NT2673 : 2741  
##  RSPB                                                :5809    NT2773 : 2031  
##  Butterfly Conservation                              :3000    NT2873 : 1247  
##  Scottish Wildlife Trust                             :2070    NT2570 : 1001  
##  Conchological Society of Great Britain &amp; Ireland:1998    NT27   :  888  
##  The Wildlife Information Centre                     :1860    NT2871 :  767  
##  (Other)                                             :4203    (Other):17009  
##       year                      taxonName                taxonGroup  
##  Min.   :2000   Maniola jurtina      : 1710   Butterfly       :9670  
##  1st Qu.:2006   Aphantopus hyperantus: 1468   Bird            :7366  
##  Median :2009   Turdus merula        : 1112   Flowering.Plants:2625  
##  Mean   :2009   Lycaena phlaeas      :  972   Mollusc         :2226  
##  3rd Qu.:2011   Aglais urticae       :  959   Hymenopteran    :1391  
##  Max.   :2016   Aglais io            :  720   Mammal          : 960  
##                 (Other)              :18743   (Other)         :1446

summary(edidiv$taxonGroup)  # Gives you a summary of that particular variable (column) in your dataset

##           Beetle             Bird        Butterfly        Dragonfly 
##              426             7366             9670              421 
## Flowering.Plants           Fungus     Hymenopteran           Lichen 
##             2625              334             1391              140 
##        Liverwort           Mammal          Mollusc 
##              125              960             2226

Calcular la riqueza de especies

Hacemos uso de la siguiente función filter() para crear divisiones entre los objetos:

Beetle <- filter(edidiv, taxonGroup == "Beetle")
Bird <- filter(edidiv, taxonGroup == "Bird")  
Butterfly<- filter(edidiv, taxonGroup == "Butterfly")
Dragonfly<- filter(edidiv, taxonGroup == "Dragonfly")
Flowering.Plants<- filter(edidiv, taxonGroup == "Flowering.Plants")
Hymenopteran<- filter(edidiv, taxonGroup == "Hymenopteran")
Lichen<- filter(edidiv, taxonGroup == "Lichen")
Liverwort<- filter(edidiv, taxonGroup == "Liverwort")
Fungus<- filter(edidiv, taxonGroup == "Fungus")
Mammal<- filter(edidiv, taxonGroup == "Mammal")
Mollusc<- filter(edidiv, taxonGroup == "Mollusc")

O también podemos utilizar la siguientes funciones juntas: unique() que identifica diferentes especies y length() que realiza su conteo:

a <- length(unique(Beetle$taxonName))
b <- length(unique(Bird$taxonName))
c<- length(unique(Butterfly$taxonName))
d<- length(unique(Dragonfly$taxonName))
e<- length(unique(Flowering.Plants$taxonName))
f<- length(unique(Fungus$taxonName))
g<- length(unique(Hymenopteran$taxonName))
h<- length(unique(Lichen$taxonName))
i<- length(unique(Liverwort$taxonName))
j<- length(unique(Mammal$taxonName))
k<- length(unique(Mollusc$taxonName))

Creación de un vector

Hacemos esto usando la función c() que significa concatenar. También podemos agregar etiquetas con la función names(), para que los valores salgan de la base de datos:

biodiv <- c(a,b,c,d,e,f,g,h,i,j,k)     # We are chaining together all the values; pay attention to the object names you have calculated and their order
names(biodiv) <- c("Beetle",
                   "Bird",
                   "Butterfly",
                   "Dragonfly",
                   "Flowering.Plants",
                   "Fungus",
                   "Hymenopteran",
                   "Lichen",
                   "Liverwort",
                   "Mammal",
                   "Mollusc")

Para visualizar la riqueza de especies en un diagrama usamos la función barplot():

barplot(biodiv)

Si queremos visualizar la información de forma gráfica

png("barplot.png", width=1600, height=600)  # look up the help for this function: you can customise the size and resolution of the image
barplot(biodiv, xlab="Taxa", ylab="Number of species", ylim=c(0,600), cex.names= 1.5, cex.axis=1.5, cex.lab=1.5)
dev.off()

## png 
##   2

Creación de un marco de datos

Utilizaremos la función data.frame(), pero primero crearemos un objeto que contenga los nombres de todos los taxones (una columna) y otro objeto con todos los valores para la riqueza de especies de cada taxón (otra columna).

# Creating an object called "taxa" that contains all the taxa names
taxa <- c("Beetle",
          "Bird",
          "Butterfly",
          "Dragonfly",
          "Flowering.Plants",
          "Fungus",
          "Hymenopteran",
          "Lichen",
          "Liverwort",
          "Mammal",
          "Mollusc")
# Turning this object into a factor, i.e. a categorical variable
taxa_f <- factor(taxa)

# Combining all the values for the number of species in an object called richness
richness <- c(a,b,c,d,e,f,g,h,i,j,k)

# Creating the data frame from the two vectors
biodata <- data.frame(taxa_f, richness)

# Saving the file
write.csv(biodata, file="biodata.csv")  # it will be saved in your working directory

Para visualizar la información anterior usamos

 png("barplot2.png", width=1600, height=600)
        barplot(biodata$richness, names.arg=c("Beetle",
                                              "Bird",
                                              "Butterfly",
                                              "Dragonfly",
                                              "Flowering.Plants",
                                              "Fungus",
                                              "Hymenopteran",
                                              "Lichen",
                                              "Liverwort",
                                              "Mammal",
                                              "Mollusc"),
                xlab="Taxa", ylab="Number of species", ylim=c(0,600))
        dev.off()

## png 
##   2