Merge exersice, and re-order factors

How to merge two data frames?
and, on a side note how to re-order factor levels

In this long-running experiment, the same sites and treatment were named differently over twenty years. Treatments and locations were coded as numbers, names, or as abbreviated names. The same with the actual measured data: Numbers, abbreviation, or names were used…

The information is now put into a few small data frames. These small data frames are then merged with the measured data step by step.

See ?merge for details

# merge example based on FESA data

setwd("~/FESA/playground/vegetation_sheets")

# create a data frame with information on the experimental set-up
# Fesa codes and Area codes
FESA_codes      <- c(1, 2, 3, 4, 5)
FESA_name       <- c("Blakeville", "Barkstead", "Burnt Bridge Ck", "Musk Ck", "Kangaroo Ck")
FESA_area_names <- data.frame(FESA_codes, FESA_name)
FESA_area_names$FESA_codes <- as.factor(FESA_area_names$FESA_codes)
# show the data frame
FESA_area_names

##   FESA_codes       FESA_name
## 1          1      Blakeville
## 2          2       Barkstead
## 3          3 Burnt Bridge Ck
## 4          4         Musk Ck
## 5          5     Kangaroo Ck


# data frame for the plot codes
Plot_codes <- c(1, 2, 3, 4, 5)
Plot_name  <- c("Control", "Spring3", "Spring10", "Autumn3", "Autumn10")
Plot_names <- data.frame(Plot_codes, Plot_name)
Plot_names$Plot_codes <- as.factor(Plot_names$Plot_codes)
# show the data frame
Plot_names

##   Plot_codes Plot_name
## 1          1   Control
## 2          2   Spring3
## 3          3  Spring10
## 4          4   Autumn3
## 5          5  Autumn10


# add fire-history data
# import fire history table from other folder
fire_history <- read.csv("~/FESA/Database/fire_history.csv", header=T)

# don't need the year information, only interested in the burn date
fire_history$Year <- NULL

fire_history$Burn_date <- as.POSIXct(as.character(fire_history$Burn_date), 
                                     format="%d/%m/%Y", tz = "EST")

# "Control" treatment should be first, re-ordering the factor based on the defintion given in "Plot_name"
fire_history$Treatment <- factor(fire_history$Treatment, 
                                 levels = Plot_name)
head(fire_history)

##    FESA_name Treatment  Burn_date
## 1 Blakeville   Control 1935-01-01
## 2 Blakeville   Spring3 1935-01-01
## 3 Blakeville  Spring10 1935-01-01
## 4 Blakeville   Autumn3 1935-01-01
## 5 Blakeville  Autumn10 1935-01-01
## 6  Barkstead   Control 1931-01-01


# importance of ordering factors
ggplot(fire_history, aes(x = FESA_name, y = Burn_date)) + geom_point()

## Warning: Removed 491 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-1


# now re-ordering the factor
# historically, the sites have a specific order. Unfortunately, this orde ris not alphabetic. Here we specify the order we want (as defined previously)
fire_history$FESA_name <- factor(fire_history$FESA_name, 
                                 levels = FESA_name)

# doing the graph again - watch closely
ggplot(fire_history, aes(x = FESA_name, y = Burn_date)) + geom_point()

## Warning: Removed 491 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-1


# The order of things is specified via "factor". All other function will honour the ordering.

#############
# merge data
# import and prepare Abundance data
Abu <- read.csv("all_Abu_files_combined.csv", header=T)

# get rid of the Assessor names for protection
Abu$Assessor <- NULL

Abu$FESA_code       <- as.factor(Abu$FESA_code)
Abu$Area            <- as.factor(Abu$Area)
Abu$Abundance_cover <- gsub("na", "", Abu$Abundance_cover)
Abu$Abundance_cover <- as.factor(Abu$Abundance_cover)
Abu$Date <- as.POSIXct(as.character(Abu$Date), 
                       format="%d/%m/%Y", origin="1970-01-01", tz = "EST")
Abu$Year <- year(Abu$Date) # using "year" from package lubridate

# display the Abundance data frame
head(Abu)

##   FESA_code Area       Date  Species Abundance_cover Year
## 1         2    5 2012-03-06 acac.mel               s 2012
## 2         2    5 2012-03-06 acac.muc                 2012
## 3         2    5 2012-03-06 acro.pro               s 2012
## 4         2    5 2012-03-06 acro.ser               u 2012
## 5         2    5 2012-03-06 aspe.sco                 2012
## 6         2    5 2012-03-06 bill.sca               s 2012


# data frame for the abundance codes
Abundance_Abb   <- c("p", "c", "cp", "s", "u")
Abundance_Name  <- c("prolific", "common", "common in patches", "scattered", "uncommon")
Abundance_No    <- c(5, 4, 3, 2, 1)
Abundance_names <- data.frame(Abundance_Abb, 
                              Abundance_Name, 
                              Abundance_No)

Abu <- merge(Abu, Abundance_names, 
             by.x  = c("Abundance_cover"), 
             by.y  = c("Abundance_Abb"), 
             all.x = TRUE)
# the order of the names of the abundance categories are not alphabetic
# re-ordering to get the order we want
Abu$Abundance_Name <- factor(Abu$Abundance_Name, 
                             levels = Abundance_Name)

# add the names of treatments to the data
Abu <- merge(Abu, Plot_names, 
             by.x = c("Area"), 
             by.y = c("Plot_codes"))

Abu <- merge(Abu, FESA_area_names, 
             by.x = c("FESA_code"),
             by.y = c("FESA_codes"))
Abu$Plot_name <- factor(Abu$Plot_name, levels = Plot_name)
Abu$FESA_name <- factor(Abu$FESA_name, levels = FESA_name)

# merge by two elements at the same time
x <- merge(fire_history, Abu, 
           by.x = c("FESA_name", "Treatment"), 
           by.y = c("FESA_name", "Plot_name"), 
           all.y=TRUE)

# have look some data with a exisiting burn date
head(x[!is.na(x$Burn_date), ])

##      FESA_name Treatment  Burn_date FESA_code Area Abundance_cover
## 3217 Barkstead  Autumn10 1987-04-27         2    5               u
## 3218 Barkstead  Autumn10 1987-04-27         2    5               u
## 3219 Barkstead  Autumn10 1987-04-27         2    5               s
## 3220 Barkstead  Autumn10 1987-04-27         2    5               u
## 3221 Barkstead  Autumn10 1987-04-27         2    5                
## 3222 Barkstead  Autumn10 1987-04-27         2    5               c
##            Date  Species Year Abundance_Name Abundance_No
## 3217 2012-03-06 heli.sco 2012       uncommon            1
## 3218 1999-02-03 chil.gun 1999       uncommon            1
## 3219 1996-01-18 hypo.rad 1996      scattered            2
## 3220 1995-01-30 hype.gra 1995       uncommon            1
## 3221 1989-02-03 spic.hun 1989           <NA>           NA
## 3222 1996-01-18 loma.lon 1996         common            4