elongation <- read.csv("EmpetrumElongation.csv", header = TRUE)   

Check import and preview data

head(elongation)   # first few observations
##   Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1    2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2    2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3    2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4    2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5    2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6    2   395   6.1   5.6   4.4   4.5   4.5   7.6
str(elongation)    # types of variables
## 'data.frame':    114 obs. of  8 variables:
##  $ Zone : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Indiv: int  373 379 383 389 390 395 396 408 412 421 ...
##  $ X2007: num  5.1 8.1 9.3 15 3.5 6.1 7.2 6.1 4.6 7.2 ...
##  $ X2008: num  5.1 13.3 8.5 10.3 6.2 5.6 5.9 8.1 6.7 5.8 ...
##  $ X2009: num  4.8 8.6 11.7 6.8 4.7 4.4 5.7 7.6 4.5 5.8 ...
##  $ X2010: num  8.7 4.9 7.9 6.9 3.8 4.5 5.4 6.2 2.8 5.6 ...
##  $ X2011: num  6.3 5.9 8 5.9 3.5 4.5 5.5 9.6 4.2 4.3 ...
##  $ X2012: num  3.2 6.3 6.3 7.6 3 7.6 4.7 10.1 5.2 3.4 ...

Let’s get some information out of this object!

elongation$Indiv   # prints out all the ID codes in the dataset
##   [1]  373  379  383  389  390  395  396  408  412  421  425  429  431  442  444
##  [16]  447  458  464  486  512  530  534  538  542  549  569  572  577  581  584
##  [31]  597  616  647  660  591  595  603  612  618  619  623  632  641  645  646
##  [46]  648  654  657  661  663  677  678  682    1   37   38   41   49   64   71
##  [61]   72   85   88  101  114  120  126  145  150  155  166  177  206  129  257
##  [76]  276  303  322  349  350  355  370  454  725  766  812  817  844  863  868
##  [91]  896  899  901  917  924  963  970  977  979 1000 1069 1073 1087 1109 1140
## [106] 1183 1188 1247 1286 1340 1358 1392 1477 1481
length(unique(elongation$Indiv))   # returns the number of distinct shrubs in the data
## [1] 114
# Here's how we get the value in the second row and fifth column
elongation[2,5]
## [1] 8.6
# Here's how we get all the info for row number 6
elongation[6, ]
##   Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 6    2   395   6.1   5.6   4.4   4.5   4.5   7.6
# And of course you can mix it all together! 
elongation[6, ]$Indiv   # returns the value in the column Indiv for the sixth observation
## [1] 395
# (much easier calling columns by their names than figuring out where they are!) 
# Let's access the values for Individual number 603
elongation[elongation$Indiv == 603, ]
##    Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 37    4   603   2.5   3.8   3.2   2.3   3.2   5.4
# Subsetting with one condition

elongation[elongation$Zone < 4, ]    # returns only the data for zones 2-3
##    Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1     2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2     2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3     2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4     2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5     2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6     2   395   6.1   5.6   4.4   4.5   4.5   7.6
## 7     2   396   7.2   5.9   5.7   5.4   5.5   4.7
## 8     2   408   6.1   8.1   7.6   6.2   9.6  10.1
## 9     2   412   4.6   6.7   4.5   2.8   4.2   5.2
## 10    2   421   7.2   5.8   5.8   5.6   4.3   3.4
## 11    2   425   6.4   8.1   7.8   6.2   6.4   7.9
## 12    2   429   8.9  11.2   8.6   8.4   8.2   6.6
## 13    2   431   3.5   5.9   5.4   4.9   3.5   4.9
## 14    2   442   5.3   9.9   9.2   6.4   5.5   5.0
## 15    2   444   8.7   5.6   5.9   5.2   6.6   7.4
## 16    3   447   8.3   6.5   8.1   7.0   7.6   8.3
## 17    3   458   5.4   8.6   6.1   5.4   7.2   9.4
## 18    3   464   9.2   9.7   7.8   5.8   7.2   6.2
## 19    3   486   9.1   6.7   6.8   6.1   8.4   7.9
## 20    3   512   9.0  10.8   7.1   5.8   8.3   6.7
## 21    3   530   9.2  11.4   4.5   3.6   6.9   5.7
## 22    3   534   7.4   6.8   7.9   3.7   5.5   6.7
## 23    3   538   8.9  12.5  10.5   6.3   7.3   7.4
## 24    3   542   5.3   6.0   5.8   8.6   6.4   7.9
## 25    3   549   4.5   5.6   5.8   4.4   7.4   4.9
## 26    3   569   4.2   6.3   6.2   6.3   6.4   6.4
## 27    3   572   7.1  10.1  10.0  10.1   9.8  13.1
## 28    3   577   6.8  12.1  11.1   8.6   6.1   8.2
## 29    3   581   6.0   6.9   6.3   3.7   5.6   5.3
## 30    3   584   5.6   6.6   5.9   4.8   6.0   6.1
## 31    3   597   8.9   7.9   6.4   7.1   8.0   9.4
## 32    3   616   9.7  10.8   8.8   6.4   9.5   8.0
## 33    3   647   8.1   6.9   6.9   4.0   7.2   7.3
## 34    3   660   6.7   5.4   6.0   4.9   6.8   6.4
elongation[elongation$Zone <= 4, ]   # returns only the data for zones 2-3-4
##    Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1     2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2     2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3     2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4     2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5     2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6     2   395   6.1   5.6   4.4   4.5   4.5   7.6
## 7     2   396   7.2   5.9   5.7   5.4   5.5   4.7
## 8     2   408   6.1   8.1   7.6   6.2   9.6  10.1
## 9     2   412   4.6   6.7   4.5   2.8   4.2   5.2
## 10    2   421   7.2   5.8   5.8   5.6   4.3   3.4
## 11    2   425   6.4   8.1   7.8   6.2   6.4   7.9
## 12    2   429   8.9  11.2   8.6   8.4   8.2   6.6
## 13    2   431   3.5   5.9   5.4   4.9   3.5   4.9
## 14    2   442   5.3   9.9   9.2   6.4   5.5   5.0
## 15    2   444   8.7   5.6   5.9   5.2   6.6   7.4
## 16    3   447   8.3   6.5   8.1   7.0   7.6   8.3
## 17    3   458   5.4   8.6   6.1   5.4   7.2   9.4
## 18    3   464   9.2   9.7   7.8   5.8   7.2   6.2
## 19    3   486   9.1   6.7   6.8   6.1   8.4   7.9
## 20    3   512   9.0  10.8   7.1   5.8   8.3   6.7
## 21    3   530   9.2  11.4   4.5   3.6   6.9   5.7
## 22    3   534   7.4   6.8   7.9   3.7   5.5   6.7
## 23    3   538   8.9  12.5  10.5   6.3   7.3   7.4
## 24    3   542   5.3   6.0   5.8   8.6   6.4   7.9
## 25    3   549   4.5   5.6   5.8   4.4   7.4   4.9
## 26    3   569   4.2   6.3   6.2   6.3   6.4   6.4
## 27    3   572   7.1  10.1  10.0  10.1   9.8  13.1
## 28    3   577   6.8  12.1  11.1   8.6   6.1   8.2
## 29    3   581   6.0   6.9   6.3   3.7   5.6   5.3
## 30    3   584   5.6   6.6   5.9   4.8   6.0   6.1
## 31    3   597   8.9   7.9   6.4   7.1   8.0   9.4
## 32    3   616   9.7  10.8   8.8   6.4   9.5   8.0
## 33    3   647   8.1   6.9   6.9   4.0   7.2   7.3
## 34    3   660   6.7   5.4   6.0   4.9   6.8   6.4
## 35    4   591   6.5  10.9   7.8   6.1  11.0   5.0
## 36    4   595   9.8   9.2   8.0   6.7  10.2  10.5
## 37    4   603   2.5   3.8   3.2   2.3   3.2   5.4
## 38    4   612   5.4   6.5   8.0   5.7   4.2   6.6
## 39    4   618   6.1   7.5   6.1   4.2   5.4   5.5
## 40    4   619   8.4  10.5   6.9   5.2  11.1   7.4
## 41    4   623   5.4   7.5   4.4   3.4   5.2   6.8
## 42    4   632   7.3   7.3   6.9   6.3   6.9   6.6
## 43    4   641   7.1  13.2   9.5   6.0   7.6   6.7
## 44    4   645   8.0  11.5   7.2   8.5   9.7   9.9
## 45    4   646  11.5   7.0   4.1   5.2   7.8   6.8
## 46    4   648  10.7   9.2   6.9   5.5   8.8   9.2
## 47    4   654   9.5  10.4   8.9   8.6  11.1  10.2
## 48    4   657   6.6  14.0  10.1   6.6   9.7   6.2
## 49    4   661   8.2   8.9   6.1   5.7  11.2   8.7
## 50    4   663   5.7  10.6   9.5   6.9   8.3   7.8
## 51    4   677   6.0   8.6   6.1   7.0   7.2   7.8
## 52    4   678   3.6   3.7   8.2   4.4   4.6   7.9
## 53    4   682   5.0   6.8   8.7   5.1   5.6   6.0
# This is completely equivalent to the last statement
elongation[!elongation$Zone >= 5, ]   # the ! means exclude
##    Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1     2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2     2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3     2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4     2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5     2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6     2   395   6.1   5.6   4.4   4.5   4.5   7.6
## 7     2   396   7.2   5.9   5.7   5.4   5.5   4.7
## 8     2   408   6.1   8.1   7.6   6.2   9.6  10.1
## 9     2   412   4.6   6.7   4.5   2.8   4.2   5.2
## 10    2   421   7.2   5.8   5.8   5.6   4.3   3.4
## 11    2   425   6.4   8.1   7.8   6.2   6.4   7.9
## 12    2   429   8.9  11.2   8.6   8.4   8.2   6.6
## 13    2   431   3.5   5.9   5.4   4.9   3.5   4.9
## 14    2   442   5.3   9.9   9.2   6.4   5.5   5.0
## 15    2   444   8.7   5.6   5.9   5.2   6.6   7.4
## 16    3   447   8.3   6.5   8.1   7.0   7.6   8.3
## 17    3   458   5.4   8.6   6.1   5.4   7.2   9.4
## 18    3   464   9.2   9.7   7.8   5.8   7.2   6.2
## 19    3   486   9.1   6.7   6.8   6.1   8.4   7.9
## 20    3   512   9.0  10.8   7.1   5.8   8.3   6.7
## 21    3   530   9.2  11.4   4.5   3.6   6.9   5.7
## 22    3   534   7.4   6.8   7.9   3.7   5.5   6.7
## 23    3   538   8.9  12.5  10.5   6.3   7.3   7.4
## 24    3   542   5.3   6.0   5.8   8.6   6.4   7.9
## 25    3   549   4.5   5.6   5.8   4.4   7.4   4.9
## 26    3   569   4.2   6.3   6.2   6.3   6.4   6.4
## 27    3   572   7.1  10.1  10.0  10.1   9.8  13.1
## 28    3   577   6.8  12.1  11.1   8.6   6.1   8.2
## 29    3   581   6.0   6.9   6.3   3.7   5.6   5.3
## 30    3   584   5.6   6.6   5.9   4.8   6.0   6.1
## 31    3   597   8.9   7.9   6.4   7.1   8.0   9.4
## 32    3   616   9.7  10.8   8.8   6.4   9.5   8.0
## 33    3   647   8.1   6.9   6.9   4.0   7.2   7.3
## 34    3   660   6.7   5.4   6.0   4.9   6.8   6.4
## 35    4   591   6.5  10.9   7.8   6.1  11.0   5.0
## 36    4   595   9.8   9.2   8.0   6.7  10.2  10.5
## 37    4   603   2.5   3.8   3.2   2.3   3.2   5.4
## 38    4   612   5.4   6.5   8.0   5.7   4.2   6.6
## 39    4   618   6.1   7.5   6.1   4.2   5.4   5.5
## 40    4   619   8.4  10.5   6.9   5.2  11.1   7.4
## 41    4   623   5.4   7.5   4.4   3.4   5.2   6.8
## 42    4   632   7.3   7.3   6.9   6.3   6.9   6.6
## 43    4   641   7.1  13.2   9.5   6.0   7.6   6.7
## 44    4   645   8.0  11.5   7.2   8.5   9.7   9.9
## 45    4   646  11.5   7.0   4.1   5.2   7.8   6.8
## 46    4   648  10.7   9.2   6.9   5.5   8.8   9.2
## 47    4   654   9.5  10.4   8.9   8.6  11.1  10.2
## 48    4   657   6.6  14.0  10.1   6.6   9.7   6.2
## 49    4   661   8.2   8.9   6.1   5.7  11.2   8.7
## 50    4   663   5.7  10.6   9.5   6.9   8.3   7.8
## 51    4   677   6.0   8.6   6.1   7.0   7.2   7.8
## 52    4   678   3.6   3.7   8.2   4.4   4.6   7.9
## 53    4   682   5.0   6.8   8.7   5.1   5.6   6.0
# Subsetting with two conditions
elongation[elongation$Zone == 2 | elongation$Zone == 7, ]    # returns only data for zones 2 and 7
##     Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1      2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2      2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3      2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4      2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5      2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6      2   395   6.1   5.6   4.4   4.5   4.5   7.6
## 7      2   396   7.2   5.9   5.7   5.4   5.5   4.7
## 8      2   408   6.1   8.1   7.6   6.2   9.6  10.1
## 9      2   412   4.6   6.7   4.5   2.8   4.2   5.2
## 10     2   421   7.2   5.8   5.8   5.6   4.3   3.4
## 11     2   425   6.4   8.1   7.8   6.2   6.4   7.9
## 12     2   429   8.9  11.2   8.6   8.4   8.2   6.6
## 13     2   431   3.5   5.9   5.4   4.9   3.5   4.9
## 14     2   442   5.3   9.9   9.2   6.4   5.5   5.0
## 15     2   444   8.7   5.6   5.9   5.2   6.6   7.4
## 95     7   924   8.1   7.8   7.1   6.8   7.0   6.6
## 96     7   963   6.8   7.2   3.7   3.5   4.0   4.5
## 97     7   970   6.2   5.2   5.9   6.2   7.7   6.3
## 98     7   977   5.5   5.3   6.4   5.7   7.7   7.9
## 99     7   979   6.6   6.3   6.3   7.7   6.6   7.1
## 100    7  1000   4.3   5.5   3.6   3.1   6.7   5.8
## 101    7  1069   6.2   3.9   4.1   3.8   5.3   7.2
## 102    7  1073   7.3   6.3   5.3   6.7   6.0   8.7
## 103    7  1087   5.9   6.7   7.4   5.1   5.3   6.2
## 104    7  1109   4.5   4.3   3.4   4.3   2.9   2.4
## 105    7  1140   4.7   3.9   5.5   5.2   5.7   3.9
## 106    7  1183   7.2   9.3   6.7   7.8   8.3   7.7
## 107    7  1188   9.4   8.9   7.0   6.9   9.5   7.8
## 108    7  1247   5.5   5.0   4.2   3.9   6.0   4.8
## 109    7  1286   7.1   9.5   7.7   5.6   8.0   8.2
## 110    7  1340   7.1   5.7   7.2   6.9   7.3   8.4
## 111    7  1358   9.0  10.0   8.6   8.5   8.3   8.3
## 112    7  1392   3.9   4.7   2.9   3.9   3.8   4.8
## 113    7  1477   5.3   5.9   6.2   4.2   5.5   6.1
## 114    7  1481   5.4   6.7   6.0   6.0   7.2   7.0
elongation[elongation$Zone == 2 & elongation$Indiv %in% c(300:400), ]    # returns data for shrubs in zone 2 whose ID numbers are between 300 and 400
##   Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1    2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2    2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3    2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4    2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5    2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6    2   395   6.1   5.6   4.4   4.5   4.5   7.6
## 7    2   396   7.2   5.9   5.7   5.4   5.5   4.7

CHANGING VARIABLE NAMES AND VALUES IN A DATA FRAME

## CHANGING VARIABLE NAMES AND VALUES IN A DATA FRAME

# Let's create a working copy of our object
elong2 <- elongation

# Now suppose you want to change the name of a column: you can use the names() function
# Used on its own, it returns a vector of the names of the columns. Used on the left side of the assign arrow, it overwrites all or some of the names to value(s) of your choice. 

names(elong2)                 # returns the names of the columns
## [1] "Zone"  "Indiv" "X2007" "X2008" "X2009" "X2010" "X2011" "X2012"
names(elong2)[1] <- "zone"    # Changing Zone to zone: we call the 1st element of the names vector using brackets, and assign it a new value
names(elong2)[2] <- "ID"      # Changing Indiv to ID: we call the 2nd element and assign it the desired value

# Now suppose there's a mistake in the data, and the value 5.1 for individual 373 in year 2008 should really be 5.7

## - option 1: you can use row and column number
elong2[1,4] <- 5.7

## - option 2: you can use logical conditions for more control
elong2[elong2$ID == 373, ]$X2008 <- 5.7   # completely equivalent to option 1

CREATING A FACTOR

 #Let's check the classes 
str(elong2)
## 'data.frame':    114 obs. of  8 variables:
##  $ zone : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ ID   : int  373 379 383 389 390 395 396 408 412 421 ...
##  $ X2007: num  5.1 8.1 9.3 15 3.5 6.1 7.2 6.1 4.6 7.2 ...
##  $ X2008: num  5.7 13.3 8.5 10.3 6.2 5.6 5.9 8.1 6.7 5.8 ...
##  $ X2009: num  4.8 8.6 11.7 6.8 4.7 4.4 5.7 7.6 4.5 5.8 ...
##  $ X2010: num  8.7 4.9 7.9 6.9 3.8 4.5 5.4 6.2 2.8 5.6 ...
##  $ X2011: num  6.3 5.9 8 5.9 3.5 4.5 5.5 9.6 4.2 4.3 ...
##  $ X2012: num  3.2 6.3 6.3 7.6 3 7.6 4.7 10.1 5.2 3.4 ...
# The zone column shows as integer data (whole numbers), but it's really a grouping factor (the zones could have been called A, B, C, etc.) Let's turn it into a factor:

elong2$zone <- as.factor(elong2$zone)        # converting and overwriting original class
str(elong2)                                  # now zone is a factor with 6 levels
## 'data.frame':    114 obs. of  8 variables:
##  $ zone : Factor w/ 6 levels "2","3","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID   : int  373 379 383 389 390 395 396 408 412 421 ...
##  $ X2007: num  5.1 8.1 9.3 15 3.5 6.1 7.2 6.1 4.6 7.2 ...
##  $ X2008: num  5.7 13.3 8.5 10.3 6.2 5.6 5.9 8.1 6.7 5.8 ...
##  $ X2009: num  4.8 8.6 11.7 6.8 4.7 4.4 5.7 7.6 4.5 5.8 ...
##  $ X2010: num  8.7 4.9 7.9 6.9 3.8 4.5 5.4 6.2 2.8 5.6 ...
##  $ X2011: num  6.3 5.9 8 5.9 3.5 4.5 5.5 9.6 4.2 4.3 ...
##  $ X2012: num  3.2 6.3 6.3 7.6 3 7.6 4.7 10.1 5.2 3.4 ...

And what if you’re not happy with the factor levels? You can see the names of the factors with the levels() function… and yes, overwrite them, too.

CHANGING A FACTOR’S LEVELS

levels(elong2$zone)  # shows the different factor levels
## [1] "2" "3" "4" "5" "6" "7"
levels(elong2$zone) <- c("A", "B", "C", "D", "E", "F")   # you can overwrite the original levels with new names

# You must make sure that you have a vector the same length as the number of factors, and pay attention to the order in which they appear!

2. What is tidy data, and how do we achieve it?

tidy datasets are arranged so that each row represents an observation and each column represents a variable.
R would be able to split the dataframe correctly, as each grouping factor has its own column.

Something not quite tidy with our previous object elongation? We have observation of the same variable, i.e. stem length, spread across multiple columns representing different years.

The gather() function from the tidyr package will let us convert this wide-format table to a tidy dataframe. We want to create a single column Year that will have years currently in the columns (2007-2012) repeated for each individual. From this, you should be able to work out that the dataframe will be six times longer than the original. We also want a column Length where all the growth data associated to each year and individual will go

library(tidyr)             # load the package


elongation_long <- gather(elongation, Year, Length,                           # in this order: data frame, key, value
                          c(X2007, X2008, X2009, X2010, X2011, X2012))        # we need to specify which columns to gather

head(elongation_long)
##   Zone Indiv  Year Length
## 1    2   373 X2007    5.1
## 2    2   379 X2007    8.1
## 3    2   383 X2007    9.3
## 4    2   389 X2007   15.0
## 5    2   390 X2007    3.5
## 6    2   395 X2007    6.1
# Here we want the lengths (value) to be gathered by year (key) 

# Let's reverse! spread() is the inverse function, allowing you to go from long to wide format
elongation_wide <- spread(elongation_long, Year, Length) 
head(elongation_wide)
##   Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012
## 1    2   373   5.1   5.1   4.8   8.7   6.3   3.2
## 2    2   379   8.1  13.3   8.6   4.9   5.9   6.3
## 3    2   383   9.3   8.5  11.7   7.9   8.0   6.3
## 4    2   389  15.0  10.3   6.8   6.9   5.9   7.6
## 5    2   390   3.5   6.2   4.7   3.8   3.5   3.0
## 6    2   395   6.1   5.6   4.4   4.5   4.5   7.6

3. Explore the most common and useful functions of dplyr

library(dplyr)              # load the package
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

3a. rename() variables

This lets you change the name(s) of a column or columns. The first argument is the data frame, the second (and third, etc.) takes the form New name = Old name.

elongation_long <- rename(elongation_long, zone = Zone, indiv = Indiv, year = Year, length = Length)     # changes the names of the columns (getting rid of capital letters) and overwriting our data frame

# As we saw earlier, the base R equivalent would have been
names(elongation_long) <- c("zone", "indiv", "year", "length")

3b. filter() rows and select() columns

These are some of the most routine functions that let you reduce your data frame to just the rows and columns you need. The filter() function works great for subsetting rows with logical operations. The select() function lets you specify which columns to keep. Note: the select() function often clashes with functions of the same name in other packages, and for that reason it is recommended to always use the notation dplyr::select() when calling it.

# FILTER OBSERVATIONS

# Let's keep observations from zones 2 and 3 only, and from years 2009 to 2011

elong_subset <- filter(elongation_long, zone %in% c(2, 3), year %in% c("X2009", "X2010", "X2011")) # you can use multiple different conditions separated by commas

# For comparison, the base R equivalent would be (not assigned to an object here):
elongation_long[elongation_long$zone %in% c(2,3) & elongation_long$year %in% c("X2009", "X2010", "X2011"), ]
##     zone indiv  year length
## 229    2   373 X2009    4.8
## 230    2   379 X2009    8.6
## 231    2   383 X2009   11.7
## 232    2   389 X2009    6.8
## 233    2   390 X2009    4.7
## 234    2   395 X2009    4.4
## 235    2   396 X2009    5.7
## 236    2   408 X2009    7.6
## 237    2   412 X2009    4.5
## 238    2   421 X2009    5.8
## 239    2   425 X2009    7.8
## 240    2   429 X2009    8.6
## 241    2   431 X2009    5.4
## 242    2   442 X2009    9.2
## 243    2   444 X2009    5.9
## 244    3   447 X2009    8.1
## 245    3   458 X2009    6.1
## 246    3   464 X2009    7.8
## 247    3   486 X2009    6.8
## 248    3   512 X2009    7.1
## 249    3   530 X2009    4.5
## 250    3   534 X2009    7.9
## 251    3   538 X2009   10.5
## 252    3   542 X2009    5.8
## 253    3   549 X2009    5.8
## 254    3   569 X2009    6.2
## 255    3   572 X2009   10.0
## 256    3   577 X2009   11.1
## 257    3   581 X2009    6.3
## 258    3   584 X2009    5.9
## 259    3   597 X2009    6.4
## 260    3   616 X2009    8.8
## 261    3   647 X2009    6.9
## 262    3   660 X2009    6.0
## 343    2   373 X2010    8.7
## 344    2   379 X2010    4.9
## 345    2   383 X2010    7.9
## 346    2   389 X2010    6.9
## 347    2   390 X2010    3.8
## 348    2   395 X2010    4.5
## 349    2   396 X2010    5.4
## 350    2   408 X2010    6.2
## 351    2   412 X2010    2.8
## 352    2   421 X2010    5.6
## 353    2   425 X2010    6.2
## 354    2   429 X2010    8.4
## 355    2   431 X2010    4.9
## 356    2   442 X2010    6.4
## 357    2   444 X2010    5.2
## 358    3   447 X2010    7.0
## 359    3   458 X2010    5.4
## 360    3   464 X2010    5.8
## 361    3   486 X2010    6.1
## 362    3   512 X2010    5.8
## 363    3   530 X2010    3.6
## 364    3   534 X2010    3.7
## 365    3   538 X2010    6.3
## 366    3   542 X2010    8.6
## 367    3   549 X2010    4.4
## 368    3   569 X2010    6.3
## 369    3   572 X2010   10.1
## 370    3   577 X2010    8.6
## 371    3   581 X2010    3.7
## 372    3   584 X2010    4.8
## 373    3   597 X2010    7.1
## 374    3   616 X2010    6.4
## 375    3   647 X2010    4.0
## 376    3   660 X2010    4.9
## 457    2   373 X2011    6.3
## 458    2   379 X2011    5.9
## 459    2   383 X2011    8.0
## 460    2   389 X2011    5.9
## 461    2   390 X2011    3.5
## 462    2   395 X2011    4.5
## 463    2   396 X2011    5.5
## 464    2   408 X2011    9.6
## 465    2   412 X2011    4.2
## 466    2   421 X2011    4.3
## 467    2   425 X2011    6.4
## 468    2   429 X2011    8.2
## 469    2   431 X2011    3.5
## 470    2   442 X2011    5.5
## 471    2   444 X2011    6.6
## 472    3   447 X2011    7.6
## 473    3   458 X2011    7.2
## 474    3   464 X2011    7.2
## 475    3   486 X2011    8.4
## 476    3   512 X2011    8.3
## 477    3   530 X2011    6.9
## 478    3   534 X2011    5.5
## 479    3   538 X2011    7.3
## 480    3   542 X2011    6.4
## 481    3   549 X2011    7.4
## 482    3   569 X2011    6.4
## 483    3   572 X2011    9.8
## 484    3   577 X2011    6.1
## 485    3   581 X2011    5.6
## 486    3   584 X2011    6.0
## 487    3   597 X2011    8.0
## 488    3   616 X2011    9.5
## 489    3   647 X2011    7.2
## 490    3   660 X2011    6.8

Note that here, we use %in% as a logical operator because we are looking to match a list of exact (character) values. If you want to keep observations within a range of numeric values, you either need two logical statements in your filter() function, e.g. length > 4 & length <= 6.5 or you can use the convenient between() function, e.g. between(length, 4, 6.5).

# SELECT COLUMNS

# Let's ditch the zone column just as an example

elong_no.zone <- dplyr::select(elongation_long, indiv, year, length)   # or alternatively
elong_no.zone <- dplyr::select(elongation_long, -zone) # the minus sign removes the column

# For comparison, the base R equivalent would be (not assigned to an object here):
elongation_long[ , -1]  # removes first column
##     indiv  year length
## 1     373 X2007    5.1
## 2     379 X2007    8.1
## 3     383 X2007    9.3
## 4     389 X2007   15.0
## 5     390 X2007    3.5
## 6     395 X2007    6.1
## 7     396 X2007    7.2
## 8     408 X2007    6.1
## 9     412 X2007    4.6
## 10    421 X2007    7.2
## 11    425 X2007    6.4
## 12    429 X2007    8.9
## 13    431 X2007    3.5
## 14    442 X2007    5.3
## 15    444 X2007    8.7
## 16    447 X2007    8.3
## 17    458 X2007    5.4
## 18    464 X2007    9.2
## 19    486 X2007    9.1
## 20    512 X2007    9.0
## 21    530 X2007    9.2
## 22    534 X2007    7.4
## 23    538 X2007    8.9
## 24    542 X2007    5.3
## 25    549 X2007    4.5
## 26    569 X2007    4.2
## 27    572 X2007    7.1
## 28    577 X2007    6.8
## 29    581 X2007    6.0
## 30    584 X2007    5.6
## 31    597 X2007    8.9
## 32    616 X2007    9.7
## 33    647 X2007    8.1
## 34    660 X2007    6.7
## 35    591 X2007    6.5
## 36    595 X2007    9.8
## 37    603 X2007    2.5
## 38    612 X2007    5.4
## 39    618 X2007    6.1
## 40    619 X2007    8.4
## 41    623 X2007    5.4
## 42    632 X2007    7.3
## 43    641 X2007    7.1
## 44    645 X2007    8.0
## 45    646 X2007   11.5
## 46    648 X2007   10.7
## 47    654 X2007    9.5
## 48    657 X2007    6.6
## 49    661 X2007    8.2
## 50    663 X2007    5.7
## 51    677 X2007    6.0
## 52    678 X2007    3.6
## 53    682 X2007    5.0
## 54      1 X2007    6.5
## 55     37 X2007    7.3
## 56     38 X2007    5.4
## 57     41 X2007    2.0
## 58     49 X2007    5.8
## 59     64 X2007    7.5
## 60     71 X2007    4.3
## 61     72 X2007    7.0
## 62     85 X2007    5.0
## 63     88 X2007    6.6
## 64    101 X2007    3.4
## 65    114 X2007    5.4
## 66    120 X2007    6.3
## 67    126 X2007    7.7
## 68    145 X2007    4.4
## 69    150 X2007    6.0
## 70    155 X2007    4.9
## 71    166 X2007    7.4
## 72    177 X2007    7.3
## 73    206 X2007    5.0
## 74    129 X2007    8.1
## 75    257 X2007    9.9
## 76    276 X2007    4.1
## 77    303 X2007    7.1
## 78    322 X2007    6.6
## 79    349 X2007    9.6
## 80    350 X2007    4.6
## 81    355 X2007    6.7
## 82    370 X2007    6.9
## 83    454 X2007    8.4
## 84    725 X2007    4.7
## 85    766 X2007    6.3
## 86    812 X2007    9.9
## 87    817 X2007    7.4
## 88    844 X2007    4.8
## 89    863 X2007    3.5
## 90    868 X2007    6.4
## 91    896 X2007    7.2
## 92    899 X2007    2.9
## 93    901 X2007    4.3
## 94    917 X2007    6.4
## 95    924 X2007    8.1
## 96    963 X2007    6.8
## 97    970 X2007    6.2
## 98    977 X2007    5.5
## 99    979 X2007    6.6
## 100  1000 X2007    4.3
## 101  1069 X2007    6.2
## 102  1073 X2007    7.3
## 103  1087 X2007    5.9
## 104  1109 X2007    4.5
## 105  1140 X2007    4.7
## 106  1183 X2007    7.2
## 107  1188 X2007    9.4
## 108  1247 X2007    5.5
## 109  1286 X2007    7.1
## 110  1340 X2007    7.1
## 111  1358 X2007    9.0
## 112  1392 X2007    3.9
## 113  1477 X2007    5.3
## 114  1481 X2007    5.4
## 115   373 X2008    5.1
## 116   379 X2008   13.3
## 117   383 X2008    8.5
## 118   389 X2008   10.3
## 119   390 X2008    6.2
## 120   395 X2008    5.6
## 121   396 X2008    5.9
## 122   408 X2008    8.1
## 123   412 X2008    6.7
## 124   421 X2008    5.8
## 125   425 X2008    8.1
## 126   429 X2008   11.2
## 127   431 X2008    5.9
## 128   442 X2008    9.9
## 129   444 X2008    5.6
## 130   447 X2008    6.5
## 131   458 X2008    8.6
## 132   464 X2008    9.7
## 133   486 X2008    6.7
## 134   512 X2008   10.8
## 135   530 X2008   11.4
## 136   534 X2008    6.8
## 137   538 X2008   12.5
## 138   542 X2008    6.0
## 139   549 X2008    5.6
## 140   569 X2008    6.3
## 141   572 X2008   10.1
## 142   577 X2008   12.1
## 143   581 X2008    6.9
## 144   584 X2008    6.6
## 145   597 X2008    7.9
## 146   616 X2008   10.8
## 147   647 X2008    6.9
## 148   660 X2008    5.4
## 149   591 X2008   10.9
## 150   595 X2008    9.2
## 151   603 X2008    3.8
## 152   612 X2008    6.5
## 153   618 X2008    7.5
## 154   619 X2008   10.5
## 155   623 X2008    7.5
## 156   632 X2008    7.3
## 157   641 X2008   13.2
## 158   645 X2008   11.5
## 159   646 X2008    7.0
## 160   648 X2008    9.2
## 161   654 X2008   10.4
## 162   657 X2008   14.0
## 163   661 X2008    8.9
## 164   663 X2008   10.6
## 165   677 X2008    8.6
## 166   678 X2008    3.7
## 167   682 X2008    6.8
## 168     1 X2008    7.7
## 169    37 X2008    7.3
## 170    38 X2008    5.8
## 171    41 X2008    4.7
## 172    49 X2008    4.4
## 173    64 X2008    7.4
## 174    71 X2008    4.7
## 175    72 X2008    9.5
## 176    85 X2008    5.6
## 177    88 X2008    7.5
## 178   101 X2008    5.6
## 179   114 X2008    5.4
## 180   120 X2008    6.3
## 181   126 X2008    6.5
## 182   145 X2008    4.7
## 183   150 X2008    3.1
## 184   155 X2008    5.1
## 185   166 X2008    5.9
## 186   177 X2008    5.0
## 187   206 X2008    4.4
## 188   129 X2008    6.7
## 189   257 X2008    8.4
## 190   276 X2008    6.2
## 191   303 X2008    6.9
## 192   322 X2008    6.9
## 193   349 X2008    9.8
## 194   350 X2008   10.3
## 195   355 X2008    5.5
## 196   370 X2008    7.6
## 197   454 X2008    9.7
## 198   725 X2008    3.7
## 199   766 X2008    6.7
## 200   812 X2008    8.7
## 201   817 X2008    8.6
## 202   844 X2008    4.7
## 203   863 X2008    3.3
## 204   868 X2008    6.9
## 205   896 X2008    7.9
## 206   899 X2008    6.0
## 207   901 X2008    4.5
## 208   917 X2008    8.9
## 209   924 X2008    7.8
## 210   963 X2008    7.2
## 211   970 X2008    5.2
## 212   977 X2008    5.3
## 213   979 X2008    6.3
## 214  1000 X2008    5.5
## 215  1069 X2008    3.9
## 216  1073 X2008    6.3
## 217  1087 X2008    6.7
## 218  1109 X2008    4.3
## 219  1140 X2008    3.9
## 220  1183 X2008    9.3
## 221  1188 X2008    8.9
## 222  1247 X2008    5.0
## 223  1286 X2008    9.5
## 224  1340 X2008    5.7
## 225  1358 X2008   10.0
## 226  1392 X2008    4.7
## 227  1477 X2008    5.9
## 228  1481 X2008    6.7
## 229   373 X2009    4.8
## 230   379 X2009    8.6
## 231   383 X2009   11.7
## 232   389 X2009    6.8
## 233   390 X2009    4.7
## 234   395 X2009    4.4
## 235   396 X2009    5.7
## 236   408 X2009    7.6
## 237   412 X2009    4.5
## 238   421 X2009    5.8
## 239   425 X2009    7.8
## 240   429 X2009    8.6
## 241   431 X2009    5.4
## 242   442 X2009    9.2
## 243   444 X2009    5.9
## 244   447 X2009    8.1
## 245   458 X2009    6.1
## 246   464 X2009    7.8
## 247   486 X2009    6.8
## 248   512 X2009    7.1
## 249   530 X2009    4.5
## 250   534 X2009    7.9
## 251   538 X2009   10.5
## 252   542 X2009    5.8
## 253   549 X2009    5.8
## 254   569 X2009    6.2
## 255   572 X2009   10.0
## 256   577 X2009   11.1
## 257   581 X2009    6.3
## 258   584 X2009    5.9
## 259   597 X2009    6.4
## 260   616 X2009    8.8
## 261   647 X2009    6.9
## 262   660 X2009    6.0
## 263   591 X2009    7.8
## 264   595 X2009    8.0
## 265   603 X2009    3.2
## 266   612 X2009    8.0
## 267   618 X2009    6.1
## 268   619 X2009    6.9
## 269   623 X2009    4.4
## 270   632 X2009    6.9
## 271   641 X2009    9.5
## 272   645 X2009    7.2
## 273   646 X2009    4.1
## 274   648 X2009    6.9
## 275   654 X2009    8.9
## 276   657 X2009   10.1
## 277   661 X2009    6.1
## 278   663 X2009    9.5
## 279   677 X2009    6.1
## 280   678 X2009    8.2
## 281   682 X2009    8.7
## 282     1 X2009    5.6
## 283    37 X2009    7.7
## 284    38 X2009    4.9
## 285    41 X2009    4.2
## 286    49 X2009    7.9
## 287    64 X2009    6.6
## 288    71 X2009    3.9
## 289    72 X2009    5.9
## 290    85 X2009    5.4
## 291    88 X2009    5.3
## 292   101 X2009    3.5
## 293   114 X2009    4.3
## 294   120 X2009    6.0
## 295   126 X2009    7.4
## 296   145 X2009    4.4
## 297   150 X2009    5.7
## 298   155 X2009    4.8
## 299   166 X2009    8.1
## 300   177 X2009    5.2
## 301   206 X2009    4.0
## 302   129 X2009    6.3
## 303   257 X2009    8.6
## 304   276 X2009    5.5
## 305   303 X2009    7.0
## 306   322 X2009    6.5
## 307   349 X2009    8.9
## 308   350 X2009    6.5
## 309   355 X2009    7.0
## 310   370 X2009    5.1
## 311   454 X2009    7.2
## 312   725 X2009    3.4
## 313   766 X2009    5.3
## 314   812 X2009    8.0
## 315   817 X2009    8.0
## 316   844 X2009    3.4
## 317   863 X2009    4.1
## 318   868 X2009    5.5
## 319   896 X2009    7.5
## 320   899 X2009    3.0
## 321   901 X2009    4.1
## 322   917 X2009    7.0
## 323   924 X2009    7.1
## 324   963 X2009    3.7
## 325   970 X2009    5.9
## 326   977 X2009    6.4
## 327   979 X2009    6.3
## 328  1000 X2009    3.6
## 329  1069 X2009    4.1
## 330  1073 X2009    5.3
## 331  1087 X2009    7.4
## 332  1109 X2009    3.4
## 333  1140 X2009    5.5
## 334  1183 X2009    6.7
## 335  1188 X2009    7.0
## 336  1247 X2009    4.2
## 337  1286 X2009    7.7
## 338  1340 X2009    7.2
## 339  1358 X2009    8.6
## 340  1392 X2009    2.9
## 341  1477 X2009    6.2
## 342  1481 X2009    6.0
## 343   373 X2010    8.7
## 344   379 X2010    4.9
## 345   383 X2010    7.9
## 346   389 X2010    6.9
## 347   390 X2010    3.8
## 348   395 X2010    4.5
## 349   396 X2010    5.4
## 350   408 X2010    6.2
## 351   412 X2010    2.8
## 352   421 X2010    5.6
## 353   425 X2010    6.2
## 354   429 X2010    8.4
## 355   431 X2010    4.9
## 356   442 X2010    6.4
## 357   444 X2010    5.2
## 358   447 X2010    7.0
## 359   458 X2010    5.4
## 360   464 X2010    5.8
## 361   486 X2010    6.1
## 362   512 X2010    5.8
## 363   530 X2010    3.6
## 364   534 X2010    3.7
## 365   538 X2010    6.3
## 366   542 X2010    8.6
## 367   549 X2010    4.4
## 368   569 X2010    6.3
## 369   572 X2010   10.1
## 370   577 X2010    8.6
## 371   581 X2010    3.7
## 372   584 X2010    4.8
## 373   597 X2010    7.1
## 374   616 X2010    6.4
## 375   647 X2010    4.0
## 376   660 X2010    4.9
## 377   591 X2010    6.1
## 378   595 X2010    6.7
## 379   603 X2010    2.3
## 380   612 X2010    5.7
## 381   618 X2010    4.2
## 382   619 X2010    5.2
## 383   623 X2010    3.4
## 384   632 X2010    6.3
## 385   641 X2010    6.0
## 386   645 X2010    8.5
## 387   646 X2010    5.2
## 388   648 X2010    5.5
## 389   654 X2010    8.6
## 390   657 X2010    6.6
## 391   661 X2010    5.7
## 392   663 X2010    6.9
## 393   677 X2010    7.0
## 394   678 X2010    4.4
## 395   682 X2010    5.1
## 396     1 X2010    6.8
## 397    37 X2010    5.6
## 398    38 X2010    3.5
## 399    41 X2010    3.3
## 400    49 X2010    6.7
## 401    64 X2010    5.4
## 402    71 X2010    3.7
## 403    72 X2010    6.6
## 404    85 X2010    8.6
## 405    88 X2010    5.2
## 406   101 X2010    2.8
## 407   114 X2010    4.6
## 408   120 X2010    5.2
## 409   126 X2010    5.7
## 410   145 X2010    3.3
## 411   150 X2010    3.9
## 412   155 X2010    7.3
## 413   166 X2010    6.5
## 414   177 X2010    4.0
## 415   206 X2010    3.2
## 416   129 X2010    6.0
## 417   257 X2010    6.8
## 418   276 X2010    4.8
## 419   303 X2010    6.4
## 420   322 X2010    4.9
## 421   349 X2010    8.5
## 422   350 X2010    4.0
## 423   355 X2010    4.8
## 424   370 X2010    5.4
## 425   454 X2010    7.6
## 426   725 X2010    1.9
## 427   766 X2010    4.9
## 428   812 X2010    8.5
## 429   817 X2010    6.6
## 430   844 X2010    3.2
## 431   863 X2010    3.3
## 432   868 X2010    4.5
## 433   896 X2010    4.9
## 434   899 X2010    2.8
## 435   901 X2010    4.2
## 436   917 X2010    6.3
## 437   924 X2010    6.8
## 438   963 X2010    3.5
## 439   970 X2010    6.2
## 440   977 X2010    5.7
## 441   979 X2010    7.7
## 442  1000 X2010    3.1
## 443  1069 X2010    3.8
## 444  1073 X2010    6.7
## 445  1087 X2010    5.1
## 446  1109 X2010    4.3
## 447  1140 X2010    5.2
## 448  1183 X2010    7.8
## 449  1188 X2010    6.9
## 450  1247 X2010    3.9
## 451  1286 X2010    5.6
## 452  1340 X2010    6.9
## 453  1358 X2010    8.5
## 454  1392 X2010    3.9
## 455  1477 X2010    4.2
## 456  1481 X2010    6.0
## 457   373 X2011    6.3
## 458   379 X2011    5.9
## 459   383 X2011    8.0
## 460   389 X2011    5.9
## 461   390 X2011    3.5
## 462   395 X2011    4.5
## 463   396 X2011    5.5
## 464   408 X2011    9.6
## 465   412 X2011    4.2
## 466   421 X2011    4.3
## 467   425 X2011    6.4
## 468   429 X2011    8.2
## 469   431 X2011    3.5
## 470   442 X2011    5.5
## 471   444 X2011    6.6
## 472   447 X2011    7.6
## 473   458 X2011    7.2
## 474   464 X2011    7.2
## 475   486 X2011    8.4
## 476   512 X2011    8.3
## 477   530 X2011    6.9
## 478   534 X2011    5.5
## 479   538 X2011    7.3
## 480   542 X2011    6.4
## 481   549 X2011    7.4
## 482   569 X2011    6.4
## 483   572 X2011    9.8
## 484   577 X2011    6.1
## 485   581 X2011    5.6
## 486   584 X2011    6.0
## 487   597 X2011    8.0
## 488   616 X2011    9.5
## 489   647 X2011    7.2
## 490   660 X2011    6.8
## 491   591 X2011   11.0
## 492   595 X2011   10.2
## 493   603 X2011    3.2
## 494   612 X2011    4.2
## 495   618 X2011    5.4
## 496   619 X2011   11.1
## 497   623 X2011    5.2
## 498   632 X2011    6.9
## 499   641 X2011    7.6
## 500   645 X2011    9.7
## 501   646 X2011    7.8
## 502   648 X2011    8.8
## 503   654 X2011   11.1
## 504   657 X2011    9.7
## 505   661 X2011   11.2
## 506   663 X2011    8.3
## 507   677 X2011    7.2
## 508   678 X2011    4.6
## 509   682 X2011    5.6
## 510     1 X2011    6.5
## 511    37 X2011    7.2
## 512    38 X2011    3.4
## 513    41 X2011    6.3
## 514    49 X2011    8.2
## 515    64 X2011    6.8
## 516    71 X2011    3.9
## 517    72 X2011    4.9
## 518    85 X2011    6.5
## 519    88 X2011    5.8
## 520   101 X2011    7.5
## 521   114 X2011    4.6
## 522   120 X2011    6.2
## 523   126 X2011    7.2
## 524   145 X2011    3.1
## 525   150 X2011    5.8
## 526   155 X2011    6.1
## 527   166 X2011    6.0
## 528   177 X2011    4.3
## 529   206 X2011    4.5
## 530   129 X2011    7.0
## 531   257 X2011    7.7
## 532   276 X2011    5.8
## 533   303 X2011    5.7
## 534   322 X2011    5.1
## 535   349 X2011    8.7
## 536   350 X2011    5.6
## 537   355 X2011    6.0
## 538   370 X2011    6.2
## 539   454 X2011    7.1
## 540   725 X2011    3.2
## 541   766 X2011    5.2
## 542   812 X2011    7.0
## 543   817 X2011    6.9
## 544   844 X2011    4.5
## 545   863 X2011    6.1
## 546   868 X2011    6.1
## 547   896 X2011    7.5
## 548   899 X2011    6.4
## 549   901 X2011    5.0
## 550   917 X2011    6.5
## 551   924 X2011    7.0
## 552   963 X2011    4.0
## 553   970 X2011    7.7
## 554   977 X2011    7.7
## 555   979 X2011    6.6
## 556  1000 X2011    6.7
## 557  1069 X2011    5.3
## 558  1073 X2011    6.0
## 559  1087 X2011    5.3
## 560  1109 X2011    2.9
## 561  1140 X2011    5.7
## 562  1183 X2011    8.3
## 563  1188 X2011    9.5
## 564  1247 X2011    6.0
## 565  1286 X2011    8.0
## 566  1340 X2011    7.3
## 567  1358 X2011    8.3
## 568  1392 X2011    3.8
## 569  1477 X2011    5.5
## 570  1481 X2011    7.2
## 571   373 X2012    3.2
## 572   379 X2012    6.3
## 573   383 X2012    6.3
## 574   389 X2012    7.6
## 575   390 X2012    3.0
## 576   395 X2012    7.6
## 577   396 X2012    4.7
## 578   408 X2012   10.1
## 579   412 X2012    5.2
## 580   421 X2012    3.4
## 581   425 X2012    7.9
## 582   429 X2012    6.6
## 583   431 X2012    4.9
## 584   442 X2012    5.0
## 585   444 X2012    7.4
## 586   447 X2012    8.3
## 587   458 X2012    9.4
## 588   464 X2012    6.2
## 589   486 X2012    7.9
## 590   512 X2012    6.7
## 591   530 X2012    5.7
## 592   534 X2012    6.7
## 593   538 X2012    7.4
## 594   542 X2012    7.9
## 595   549 X2012    4.9
## 596   569 X2012    6.4
## 597   572 X2012   13.1
## 598   577 X2012    8.2
## 599   581 X2012    5.3
## 600   584 X2012    6.1
## 601   597 X2012    9.4
## 602   616 X2012    8.0
## 603   647 X2012    7.3
## 604   660 X2012    6.4
## 605   591 X2012    5.0
## 606   595 X2012   10.5
## 607   603 X2012    5.4
## 608   612 X2012    6.6
## 609   618 X2012    5.5
## 610   619 X2012    7.4
## 611   623 X2012    6.8
## 612   632 X2012    6.6
## 613   641 X2012    6.7
## 614   645 X2012    9.9
## 615   646 X2012    6.8
## 616   648 X2012    9.2
## 617   654 X2012   10.2
## 618   657 X2012    6.2
## 619   661 X2012    8.7
## 620   663 X2012    7.8
## 621   677 X2012    7.8
## 622   678 X2012    7.9
## 623   682 X2012    6.0
## 624     1 X2012    9.5
## 625    37 X2012    7.7
## 626    38 X2012    6.0
## 627    41 X2012    7.7
## 628    49 X2012    6.1
## 629    64 X2012    7.8
## 630    71 X2012    5.3
## 631    72 X2012    4.3
## 632    85 X2012    6.8
## 633    88 X2012    5.7
## 634   101 X2012    8.0
## 635   114 X2012    5.7
## 636   120 X2012    5.0
## 637   126 X2012    7.1
## 638   145 X2012    4.5
## 639   150 X2012    7.0
## 640   155 X2012    6.3
## 641   166 X2012    8.0
## 642   177 X2012    3.0
## 643   206 X2012    3.8
## 644   129 X2012    6.5
## 645   257 X2012    7.0
## 646   276 X2012    5.7
## 647   303 X2012    6.7
## 648   322 X2012    6.5
## 649   349 X2012    6.1
## 650   350 X2012    3.1
## 651   355 X2012    6.2
## 652   370 X2012    6.8
## 653   454 X2012    7.2
## 654   725 X2012    3.7
## 655   766 X2012    7.3
## 656   812 X2012    7.4
## 657   817 X2012    7.7
## 658   844 X2012    3.6
## 659   863 X2012    4.9
## 660   868 X2012    6.2
## 661   896 X2012    7.1
## 662   899 X2012    3.1
## 663   901 X2012    4.9
## 664   917 X2012    5.3
## 665   924 X2012    6.6
## 666   963 X2012    4.5
## 667   970 X2012    6.3
## 668   977 X2012    7.9
## 669   979 X2012    7.1
## 670  1000 X2012    5.8
## 671  1069 X2012    7.2
## 672  1073 X2012    8.7
## 673  1087 X2012    6.2
## 674  1109 X2012    2.4
## 675  1140 X2012    3.9
## 676  1183 X2012    7.7
## 677  1188 X2012    7.8
## 678  1247 X2012    4.8
## 679  1286 X2012    8.2
## 680  1340 X2012    8.4
## 681  1358 X2012    8.3
## 682  1392 X2012    4.8
## 683  1477 X2012    6.1
## 684  1481 X2012    7.0
# A nice hack! select() lets you rename and reorder columns on the fly
elong_no.zone <- dplyr::select(elongation_long, Year = year, Shrub.ID = indiv, Growth = length)

# Neat, uh?

3c. mutate() your dataset by creating new columns

# CREATE A NEW COLUMN 

elong_total <- mutate(elongation, total.growth = X2007 + X2008 + X2009 + X2010 + X2011 + X2012)
head(elong_total)
##   Zone Indiv X2007 X2008 X2009 X2010 X2011 X2012 total.growth
## 1    2   373   5.1   5.1   4.8   8.7   6.3   3.2         33.2
## 2    2   379   8.1  13.3   8.6   4.9   5.9   6.3         47.1
## 3    2   383   9.3   8.5  11.7   7.9   8.0   6.3         51.7
## 4    2   389  15.0  10.3   6.8   6.9   5.9   7.6         52.5
## 5    2   390   3.5   6.2   4.7   3.8   3.5   3.0         24.7
## 6    2   395   6.1   5.6   4.4   4.5   4.5   7.6         32.7

Now, let’s see how we could accomplish the same thing on our long-format data elongation_long by using two functions that pair extremely well together: group_by()and summarise().

3d. group_by() certain factors to perform operations on chunks of data

# GROUP DATA

elong_grouped <- group_by(elongation_long, indiv)   # grouping our dataset by individual
head(elong_grouped)
## # A tibble: 6 x 4
## # Groups:   indiv [6]
##    zone indiv year  length
##   <int> <int> <chr>  <dbl>
## 1     2   373 X2007    5.1
## 2     2   379 X2007    8.1
## 3     2   383 X2007    9.3
## 4     2   389 X2007   15  
## 5     2   390 X2007    3.5
## 6     2   395 X2007    6.1
head(elongation_long)
##   zone indiv  year length
## 1    2   373 X2007    5.1
## 2    2   379 X2007    8.1
## 3    2   383 X2007    9.3
## 4    2   389 X2007   15.0
## 5    2   390 X2007    3.5
## 6    2   395 X2007    6.1

3e. summarise() data with a range of summary statistics

This function will always aggregate your original data frame, i.e. the output data frame will be shorter than the input. Here, let’s contrast summing growth increments over the study period on the original dataset vs our new grouped dataset.

# SUMMARISING OUR DATA

summary1 <- summarise(elongation_long, total.growth = sum(length))
summary2 <- summarise(elong_grouped, total.growth = sum(length))
## `summarise()` ungrouping output (override with `.groups` argument)
summary1
##   total.growth
## 1       4448.7
summary2
## # A tibble: 114 x 2
##    indiv total.growth
##    <int>        <dbl>
##  1     1         42.6
##  2    37         42.8
##  3    38         29  
##  4    41         28.2
##  5    49         39.1
##  6    64         41.5
##  7    71         25.8
##  8    72         38.2
##  9    85         37.9
## 10    88         36.1
## # … with 104 more rows

The first summary corresponds to the sum of all growth increments in the dataset (all individuals and years). The second one gives us a breakdown of total growth per individual, our grouping variable. Amazing! We can compute all sorts of summary statistics, too, like the mean or standard deviation of growth across years:

summary3 <- summarise(elong_grouped, total.growth = sum(length),
                                     mean.growth = mean(length),
                                     sd.growth = sd(length))
## `summarise()` ungrouping output (override with `.groups` argument)
head(summary3)
## # A tibble: 6 x 4
##   indiv total.growth mean.growth sd.growth
##   <int>        <dbl>       <dbl>     <dbl>
## 1     1         42.6        7.1      1.35 
## 2    37         42.8        7.13     0.781
## 3    38         29          4.83     1.14 
## 4    41         28.2        4.7      2.05 
## 5    49         39.1        6.52     1.41 
## 6    64         41.5        6.92     0.868

Always create a new object for summarised data, so that your full dataset doesn’t go away! You can always merge back some information at a later stage,

6. …_join() datasets based on shared attributes

We will import this data from the file EmpetrumTreatments.csv, which contains the details of which individuals received which treatments, and join it with our main dataset elongation_long. We can do this because both datasets have a column representing the ID of each plant: this is what we will merge by.

# Load the treatments associated with each individual

treatments <- read.csv("EmpetrumTreatments.csv", header = TRUE, sep = ";")
head(treatments)
##   Zone Indiv Treatment
## 1    2   373         C
## 2    2   379         W
## 3    2   383         F
## 4    2   389        WF
## 5    2   390         C
## 6    2   395         W
# Join the two data frames by ID code. The column names are spelled differently, so we need to tell the function which columns represent a match. We have two columns that contain the same information in both datasets: zone and individual ID.

experiment <- left_join(elongation_long, treatments, by = c("indiv" = "Indiv", "zone" = "Zone"))

# We see that the new object has the same length as our first data frame, which is what we want. And the treatments corresponding to each plant have been added!