Ecosystem Preserve Project

Read in the Given Datasets for 2010-2016 Leaf Drop Data

library(tidyverse)
## Skip lines and omit any blanks, NA's, or other insufficient cells
data2010 <- read.csv('2010 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 3)
data2011 <- read.csv('2011 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 3)
data2012 <- read.csv('2012 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 3)
data2013 <- read.csv('2013 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 4)
data2014 <- read.csv('2014 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X', 'thanksgiving', 'Thanksgiving'), skip = 4)
data2015 <- read.csv('2015 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 4)
data2016 <- read.csv('2016 Leaf Drop Data Sheets.csv', na = c('', 'x', 'X'), skip = 4)

Rename Columns

names(data2010)[1] <- 'Date'
names(data2010)[2] <- 'Basket'
names(data2010)[3] <- 'InitialWeight'
names(data2010)[4] <- 'After2Weeks'


names(data2011)[2] <- 'Date'
names(data2011)[3] <- 'Basket'
names(data2011)[4] <- 'InitialWeight'
names(data2011)[5] <- 'After2Weeks'


names(data2012)[2] <- 'Date'
names(data2012)[3] <- 'Basket'
names(data2012)[4] <- 'InitialWeight'
names(data2012)[5] <- 'After2Weeks'


names(data2013)[2] <- 'Date'
names(data2013)[3] <- 'Basket'
names(data2013)[4] <- 'InitialWeight'
names(data2013)[5] <- 'After2Weeks'


names(data2014)[2] <- 'Date'
names(data2014)[3] <- 'Basket'
names(data2014)[4] <- 'InitialWeight'
names(data2014)[5] <- 'After2Weeks'


names(data2015)[2] <- 'Date'
names(data2015)[3] <- 'Basket'
names(data2015)[4] <- 'InitialWeight'
names(data2015)[5] <- 'After2Weeks'


names(data2016)[2] <- 'Date'
names(data2016)[3] <- 'Basket'
names(data2016)[4] <- 'InitialWeight'
names(data2016)[5] <- 'After2Weeks'

## Keep only relevant columns
data2010 <- data2010 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2011 <- data2011 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2012 <- data2012 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2013 <- data2013 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2014 <- data2014 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2015 <- data2015 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

data2016 <- data2016 %>%
  select('Date', 'Basket', 'InitialWeight', 'After2Weeks')

Pasting Together

## Keep only necessary rows
data2010 <- data2010[1:75,]
data2011 <- data2011[1:114,]
data2012 <- data2012[1:105,]
data2013 <- data2013[1:133,]
data2014 <- data2014[1:129,]
data2015 <- data2015[1:115,]
data2016 <- data2016[1:124,]

## Bind the six datasets into one
newdata <- bind_rows(data2010, data2011)
newdata <- bind_rows(newdata, data2012)
newdata <- bind_rows(newdata, data2013)

## Make 2014 column for Basket an integer
data2014 <- data2014 %>%
  mutate(Basket = as.integer(Basket))


newdata <- bind_rows(newdata, data2014)
newdata <- bind_rows(newdata, data2015)

finaldata <- bind_rows(newdata, data2016)

Replace dates labeled as ‘/16’ to ‘/2016’

## Rename informal dates
## Amend each informal date separately  so that days such as '10/16/2014' are not renamed to '10/2016/2014'
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '11/23/16', '11/23/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '11/28/16', '11/28/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '11/29/16', '11/29/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '11/30/16', '11/30/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '12/01/16', '12/01/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '12/02/16', '12/02/2016'))
finaldata <- finaldata%>%
  rename(Date = 'Date')%>%
  mutate(Date = stringr::str_replace(Date, '12/03/16', '12/03/2016'))
finaldata <- finaldata%>%
  na.omit()

Separate Date Variable into Month, Day, and Year Variables

finaldata <- finaldata %>%
  separate(Date, into = c('Month', 'Day', 'Year'), sep = '/')

finaldata <- finaldata %>%
  mutate(Day = as.numeric(Day))

Add New Date Variable and 1-365 Julian Date Variable

finaldata$Date <- do.call(paste, list(finaldata$Month, finaldata$Day, finaldata$Year))
finaldata$Date <- as.Date(finaldata$Date, format=c("%m %d %Y"))

finaldata$julian_date <- as.numeric(format(finaldata$Date, "%j"))

Add Week Variable

finaldata <- finaldata %>%
  mutate(Week = week(Date))

finaldata <- finaldata %>%
  mutate(Week = as.numeric(Week))

Add in Weather Data

weather_data <- read.csv('https://sldr.netlify.app/data/gr-weather-data.csv') 
weather_data = subset(weather_data, select = -c(wdir, wpgt, tsun))

Wrangle the Weather Data

## Keep only columns of interest
weather_data <- weather_data %>%
  select(date, prcp, snow, wspd, tmax, tmin, tavg, wspd)

weather_data <- weather_data %>%
  rename(Date = date)

Merging the Weather and Leaf Drop Data

## Create Common Date Variable between both data sets
weather_data <- weather_data %>%
  separate(Date, into = c('Year', 'Month', 'Day'), sep = '-')

weather_data <- weather_data %>%
  mutate(Day = as.numeric(Day))

weather_data$Date <- do.call(paste, list(weather_data$Month, weather_data$Day, weather_data$Year))
weather_data$Date <- as.Date(weather_data$Date, format=c("%m %d %Y"))

## Merge by Date
merged_data <- merge(x = finaldata, y = weather_data, by = 'Date')

Organize Final Dataset

## Remove Repeated Columns
merged_data <- merged_data %>%
  select(-'Month.y', -'Day.y', -'Year.y')

## Rename for Better Understandability
merged_data <- merged_data %>%
  rename(Month = Month.x) %>%
  rename(Day = Day.x) %>%
  rename(Year = Year.x) %>%
  rename(Julian = julian_date) %>%
  rename(Precipitation = prcp) %>%
  rename(Snow = snow) %>%
  rename(WindSpeed = wspd) %>%
  rename(MaxTemperature = tmax) %>%
  rename(MinTemperature = tmin) %>% 
  rename(AverageTemperature = tavg)

glimpse(merged_data)

## Rows: 724
## Columns: 15
## $ Date               <date> 2010-10-04, 2010-10-04, 2010-10-05, 2010-10-05, 20…
## $ Month              <chr> "10", "10", "10", "10", "10", "10", "10", "10", "10…
## $ Day                <dbl> 4, 4, 5, 5, 7, 7, 8, 8, 9, 9, 11, 11, 12, 12, 14, 1…
## $ Year               <chr> "2010", "2010", "2010", "2010", "2010", "2010", "20…
## $ Basket             <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7,…
## $ InitialWeight      <dbl> 7.04, 2.62, 2.40, 5.16, 4.49, 10.17, 2.70, 6.46, 4.…
## $ After2Weeks        <dbl> 6.18, 2.16, 2.25, 5.10, 4.22, 9.41, 2.53, 5.70, 4.6…
## $ Julian             <dbl> 277, 277, 278, 278, 280, 280, 281, 281, 282, 282, 2…
## $ Week               <dbl> 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41,…
## $ Precipitation      <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
## $ Snow               <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ WindSpeed          <dbl> 7.6, 7.6, 9.4, 9.4, 12.2, 12.2, 13.7, 13.7, 10.4, 1…
## $ MaxTemperature     <dbl> 16.1, 16.1, 20.0, 20.0, 21.7, 21.7, 25.6, 25.6, 24.…
## $ MinTemperature     <dbl> 1.1, 1.1, 0.6, 0.6, 7.2, 7.2, 6.1, 6.1, 10.0, 10.0,…
## $ AverageTemperature <dbl> 8.2, 8.2, 10.6, 10.6, 13.7, 13.7, 15.0, 15.0, 17.5,…

North vs. South of Stream Variable

## Create Variable where Baskets 1-6, 11 are North of the Stream, and Baskets 7-10, 12 are South of the Stream
merged_data <- merged_data %>%
  mutate(NorthSouth = case_when(Basket <= 6 ~ 'N',
                                 Basket == 11 ~ 'N',
                                 Basket == 7 ~ 'S',
                                 Basket == 8 ~ 'S',
                                 Basket == 9 ~ 'S',
                                 Basket == 10 ~ 'S',
                                 Basket == 12 ~ 'S',))

Conversion and Units

## Convert Basket Leaf Mass to Leaf Mass per Square Meter
merged_data <- merged_data %>%
  mutate(After2Weeks = (After2Weeks/0.165))

Units for Variables: After2Weeks (Leaf Drop Mass): Grams per Square Meter (\(g/m^2\)) Precipitation: Millimeters (\(mm\)) Snow: Millimeters (\(mm\)) WindSpeed: Meters per Second (\(m/s\)) Temperature: Degrees Celsius (\(°C\))

Write the File

write.csv(merged_data,'leafdropdata.csv')

The “merged_data” after running everything is the Leaf Drop Data and Weather Data finalized. It is written into a new csv file called ‘leafdropdata’.

Questions We will Attempt to Answer

How does the dry weight of leaves falling compare week to week?
How do the seasons vary year to year? As in, does the leaf falling “season” begin earlier or later depending on the year?
How does the weather affect the total dry weight of the leaves?

Read in Completely Tidied Dataset

leafdropdata <- read.csv('leafdropdata.csv')


head(leafdropdata)

##   X       Date Month Day Year Basket InitialWeight After2Weeks Julian Week
## 1 1 2010-10-04    10   4 2010      1          7.04    37.45455    277   40
## 2 2 2010-10-04    10   4 2010      2          2.62    13.09091    277   40
## 3 3 2010-10-05    10   5 2010      3          2.40    13.63636    278   40
## 4 4 2010-10-05    10   5 2010      4          5.16    30.90909    278   40
## 5 5 2010-10-07    10   7 2010      5          4.49    25.57576    280   40
## 6 6 2010-10-07    10   7 2010      6         10.17    57.03030    280   40
##   Precipitation Snow WindSpeed MaxTemperature MinTemperature AverageTemperature
## 1             0    0       7.6           16.1            1.1                8.2
## 2             0    0       7.6           16.1            1.1                8.2
## 3             0    0       9.4           20.0            0.6               10.6
## 4             0    0       9.4           20.0            0.6               10.6
## 5             0    0      12.2           21.7            7.2               13.7
## 6             0    0      12.2           21.7            7.2               13.7
##   NorthSouth
## 1          N
## 2          N
## 3          N
## 4          N
## 5          N
## 6          N

Create Average Weekly Leaf Drop Weight by Year Graph

## Aggregate the mean for each week and put into new table
avg_weekly_data <- aggregate(leafdropdata$After2Weeks, list(leafdropdata$Year, leafdropdata$Week), FUN=mean)

## Rename variables
avg_weekly_data <- avg_weekly_data %>%
  rename(Year = 'Group.1') %>%
  rename(Week = 'Group.2') %>%
  rename(Mean_Week = 'x')

## Make Year Categorical
avg_weekly_data <- avg_weekly_data %>%
  mutate(Year = as.character(Year))

## Show first six rows of averaged data
head(avg_weekly_data)

##   Year Week Mean_Week
## 1 2014   37 46.767677
## 2 2011   38  5.262626
## 3 2013   38 14.691919
## 4 2014   38 16.616162
## 5 2015   38  4.328283
## 6 2011   39  9.712121

## Create Line graph and facet by year
gf_line(Mean_Week ~ Week, color = ~Year, data = avg_weekly_data) %>%
  gf_labs(title ='Figure 1', x= 'Week of Year', y = 'Average Leaf Drop Mass (g/m²)', color = '') %>%
  gf_facet_wrap(~Year)

This graph shows the Average Weekly Leaf Drop Weight over time. The graph is faceted by year in order to compare and contrast the Leaf Drop Weight over time. It appears that each year, although slightly different from one another, all show a common trend of peaking at around Week 42-45 of the year, which is around the end of October. Interestingly, 2011 Leaf Drop weight has two distinct peaks, one at the beginning and one at the end of October. There could be several reasons for this, so we will have to take a look temperature in each year to see if 2011 has distinctly different weather that might cause the bimodal distribution. In 2013 there is also a different shape as the rise in leaf drop began earlier than the rest and the peak was much lower.

Create Average Daily Leaf Drop Weight by Year Graph

## Aggregate the mean for each day and put into new table
avg_daily_data <- aggregate(leafdropdata$After2Weeks, list(leafdropdata$Year, leafdropdata$Julian), FUN=mean)

## Rename variables
avg_daily_data <- avg_daily_data %>%
  rename(Year = Group.1) %>%
  rename(Day = Group.2) %>%
  rename(Mean_Day = x)

## Make Year Categorical
avg_daily_data <- avg_daily_data %>%
  mutate(Year = as.character(Year))

## Show first six rows of averaged data
head(avg_daily_data)

##   Year Day   Mean_Day
## 1 2014 258 77.2929293
## 2 2014 259 16.2424242
## 3 2011 261  0.6666667
## 4 2013 261  7.0505051
## 5 2014 261  3.4343434
## 6 2015 261  1.0101010

## Create Line graph and facet by year
gf_line(Mean_Day ~ Day, color = ~Year, data = avg_daily_data) %>%
  gf_labs(title ='Figure 2', x= 'Day of Year', y = 'Average Leaf Drop Mass (g/m²)') %>%
  gf_facet_wrap(~Year)

This graph of Average Daily Leaf Drop Weight over time gives us more insight into what is happening throughout each year. Again in 2011, there is two distinct peaks, whereas in the other years the peaks are not as distinct. Now we see a similar trend with a more specific value as the Leaf Drop Weights tend to peak around day 300 which is October 27th. 2016 has a slightly later peak and day 300 is actually in between 2011’s peaks.

Basket Graph

gf_point(After2Weeks ~ Precipitation | Basket, data = leafdropdata)%>%
  gf_labs(title ='Figure 3', x= 'Precipitation (mm)', y = 'Leaf Drop Mass (g/m²)')

This graph shows a scatter plot of the Leaf Drop Weights by the Precipitation faceted by Basket number. As the Baskets were placed randomly around the Calvin Ecosystem Preserve, we are trying to see if any specific Basket happens to be placed in a spot where more leaves tend to fall. There does not appear to be any big trends, but some Baskets may collect slightly more leaves than others. Precipitation does not seem to have a huge effect on the leaf drop mass.

Basket Graph 2

gf_boxplot(After2Weeks ~ Basket, color = ~factor(Basket), data = leafdropdata) %>% 
  gf_facet_wrap(~Year, ncol = 1) %>% 
  gf_theme(axis.text.x = element_blank())%>%
  gf_labs(title ='Figure 4', x = '', y = 'Leaf Drop Mass (g/m²)', color = 'Basket')

Temperature Over Time

## Aggregate the mean weight and maximum temperature for each month and put into new table
avg_daily_data <- aggregate(leafdropdata$After2Weeks, list(leafdropdata$Year, leafdropdata$Julian), FUN=mean)
avg_daily_data2 <- aggregate(leafdropdata$MaxTemperature, list(leafdropdata$Year, leafdropdata$Julian), FUN=mean)

avg_daily_data <- merge(x = avg_daily_data, y = avg_daily_data2, by = c('Group.1', 'Group.2'))

## Rename variables
avg_daily_data <- avg_daily_data %>%
  rename(Year = Group.1) %>%
  rename(Day = Group.2) %>%
  rename(Mean_Daily_Temp = x.y) %>%
  rename(Mean_Daily_Weight = x.x)

## Make Year Categorical
avg_daily_data <- avg_daily_data %>%
  mutate(Year = as.character(Year))

## Create Line graph and facet by year
gf_line(Mean_Daily_Temp ~ Day, color = ~Mean_Daily_Weight, data = avg_daily_data) %>%
  gf_theme(scale_color_viridis_c())%>%
  gf_labs(title ='Figure 5', x= 'Day of Year', y = 'Temperature (°C)', color = 'Mean Daily Leaf Drop Weight') %>%
  gf_facet_wrap(~Year)

This weather graph shows the Average Temperature each day throughout each year in Grand Rapids. We see what is expected as the temperature goes up and down somewhat randomly, but overall downward trends in temperature over time. In 2011, we see a drop in temperature that comes slightly earlier in the year than usual, between days 280 and 290. This could explain the earlier rise in Leaf Drop Weight that we saw earlier in the year 2011. After that peak, the temperature rose back up as the leaves fell less, and then the temperature went down again, hence the second peak. However, every year has inconsistent day to day weather, so it is difficult to attest Leaf Drop Weight to only Temperature and there are other predictors.

Overall, the Leaf Drop Weight goes up as temperature goes down, until it reaches the point where there are no more leaves to fall.

Leaf Drop Weight by Average Temperature

gf_point(After2Weeks ~ AverageTemperature, color = ~Julian, data = leafdropdata) %>%
  gf_theme(scale_color_viridis_c())%>%
  gf_labs(title ='Figure 6', x= 'Average Temperature (ºC)', y = 'Leaf Drop Mass (g/m²)', color = 'Days')

This scatter plot shows the Leaf Drop Weight by the Minimum Temperature on the day that weight was recorded. The day is represented by color. We see a somewhat symmetric, unimodal, and normal distribution. The distribution shows us that there tends to be higher Leaf Drop weight as the temperature is in the middle, and lower Leaf Drop Weight if it is very hot or very cold. The color shows that the majority of high Leaf Drop Weights happen between around day 290-320. The darker and lighter points which resemble earlier and later dates in the year appear to be at the ends of the distribution, with low Leaf Drop Weights and either a high or low temperature.

Leaf Drop Weight by Average Temperature

## Add Graph with Temperature by Leaf Drop by Year as Requested by Partner
gf_point(After2Weeks ~ AverageTemperature, data = leafdropdata) %>%
  gf_labs(title ='Figure 7', x= 'Average Temperature (°C)', y = 'Leaf Drop Mass (g/m²)', color = 'Year')%>%
  gf_facet_wrap(~Year)

These scatter plots show that although Temperature varies year to year, there does not seem to be any trend and the temperature depending on the year is seemingly random.

Leaf Drop Weights by Wind Speed by Month

gf_point(After2Weeks ~ WindSpeed, data = leafdropdata) %>% 
  gf_labs(title ='Figure 8', x= 'Wind Speed (m/s)', y = 'Leaf Drop Mass (g/m²)')%>%
  gf_facet_wrap(~Month)

This is the Leaf Drop as a function of Wind Speed faceted by month. October seems to have a negatively skewed normal distribution while November definitely seems to be uniformly distributed. This just goes to show that high winds before the Fall season do not necessarily make leaves fall.

North vs. South of Stream

gf_boxplot(After2Weeks ~ NorthSouth, data = leafdropdata)%>%
  gf_labs(title ='Figure 9', x = "Day of Year", y = 'Leaf Drop Mass (g/m²)')

Baskets 1-6 and 11 lie North of the stream and 7-10 and 12 ar South of the Stream. There seems to be no difference between the Leaf Drop Mass on the North or South side of the stream.

North vs. South of Stream Basket Graph

## Create North vs. South of Stream scatter plots as Requested by Partner
gf_point(After2Weeks ~ Julian | NorthSouth, data = leafdropdata)%>%
  gf_labs(title ='Figure 10', x = "Day of Year", y = 'Leaf Drop Mass (g/m²)')

Both Leaf Drop Mass distributions over time look relatively similar for the baskets on the North and South side of the stream. They both have normal and unimodal distributions.

Table of Leaf Drop Mass by Basket Across Years

## Aggregate the average Leaf Drop Mass by Year and Basket
avg_basket_data <- aggregate(leafdropdata$After2Weeks, list(leafdropdata$Year, leafdropdata$Basket), FUN=mean)

## Rename Variables
avg_basket_data <- avg_basket_data %>%
  rename(Year = Group.1) %>%
  rename(Basket = Group.2) %>%
  rename(Mean_Basket = x)

## Create the Table as Requested by Partner
avg_basket_data %>%
  pivot_wider(names_from = Year, values_from = Mean_Basket)

## # A tibble: 12 × 8
##    Basket `2010` `2011` `2012` `2013` `2014` `2015` `2016`
##     <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1      1   89.4   38.4   55.4   54.6  149.    94.9   43.0
##  2      2   67.8   53.9   77.0   56.1   50.5   51.0   50.5
##  3      3   78.6   68.1   80.5   61.0   50.1   70.6   55.3
##  4      4   80.0   46.5   77.2   95.7   56.2   47.9   58.4
##  5      5  121.    51.0   74.1   89.0   47.5   46.3   51.5
##  6      6   77.8   43.3   78.0   93.3   49.2   55.3   45.0
##  7      7   74.0   72.1   91.5   85.7   49.5   77.8   65.3
##  8      8   70.8   71.9   56.7   88.8   45.9   49.0   53.4
##  9      9  111.    57.8   65.0   83.8   47.2   58.6   67.4
## 10     10   50.1   55.3   69.3   41.5   64.9   58.3   46.4
## 11     11   NA     67.7   79.8  102.    65.1   44.3   46.2
## 12     12   NA     79.1  103.    61.4   74.1   83.7   61.9

Questions We Answered

How does the dry weight of leaves falling compare week to week?

The Leaf Drop Mass increases over the Fall season, hits a peak, then trends downward. The upward trend is likely due to the days getting shorter and weather getting colder, whereas the downward trend is likely due to a decrease in the possible amount of leafs to fall.

How do the seasons vary year to year? As in, does the leaf falling “season” begin earlier or later depending on the year?

The Year to Year Leaf Drop masses are relatively similar as the Fall season continues. However, there are a few abnormalities: 2011 has two peaks and 2013 has an early increase in leaf drop leading to less of a peak.

How does the weather affect the total dry weight of the leaves?

It seems that weather is less important than time. Obviously, as it gets colder, the Leaf Drop Mass increases and then there are less leaves to fall so it decreases.

Read in Completely Tidied Dataset

leafdropdata <- read.csv('leafdropdata.csv')

Choose Response and Predictor Variables

Our variable of interest is the Leaf Drop Mass (\(g/m^2\)). Since the mass of each basket was measure once immediately after gather the basket and another time two weeks later, we will use the masses measure after two weeks because the leaves have time to dry and the water weight on them has by then evaporated. The measurement after two weeks is the most accurate.

The variables that could be predictors are year, day, precipitation (\(mm\)), wind speed (\(m/s\)), and the average temperature by day (\(°C\)). We have seven years of data from 2010-2016, and the weather or seasons could vary from year to year. As we know, the winter season is being approached as the days go on in this data set, so the day clearly should be a predictor of the dropping leaves. Similarly, the average temperature each day overall gets colder as these days go on, so temperature should be a predictor. The precipitation should be a predictor as rain could cause the leaves to fall. We also chose wind speed as a predictor because more wind or faster wind speeds can cause leaves to fall. We decided to omit snow as a predictor variable because there are a limited number of rows in the data set that have a snow height higher than zero. This could be for many reasons: the people gathering data would choose to gather on days that it did not snow, days where it snowed caused the baskets to have no leaves or ruined the leaves which led to non-applicable rows, or it just so happened that it rarely snowed on the days that baskets were collected. We also decided not too include the month or week variable as the day-to-day differences should cover time as a predictor for each individual year.

Fit the Model

require(mgcv)
leafdropdata <- leafdropdata %>%
  mutate(Week = as.character(Week)) %>%  
  mutate(After2Weeks = ifelse(After2Weeks == 0, 0.0001, After2Weeks)) 
fit_mod <- gamm(After2Weeks ~ s(Julian, k = 5, bs = 'cc') + Year + 
                  s(WindSpeed, k = 5, bs = 'tp') + 
                  Precipitation + 
                  s(AverageTemperature, k = 5, bs = 'tp'), random = list(Date = ~1, Week = ~1), 
                data = leafdropdata, 
                method = 'ML', 
                family = Gamma(link = 'log'))

## 
##  Maximum number of PQL iterations:  20

summary(fit_mod$gam)

## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## After2Weeks ~ s(Julian, k = 5, bs = "cc") + Year + s(WindSpeed, 
##     k = 5, bs = "tp") + Precipitation + s(AverageTemperature, 
##     k = 5, bs = "tp")
## 
## Parametric coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)   62.113610  58.448484   1.063    0.288
## Year          -0.029048   0.029035  -1.000    0.317
## Precipitation -0.006232   0.007791  -0.800    0.424
## 
## Approximate significance of smooth terms:
##                         edf Ref.df      F p-value    
## s(Julian)             2.834      3 79.679  <2e-16 ***
## s(WindSpeed)          1.000      1  5.153  0.0235 *  
## s(AverageTemperature) 1.000      1  0.087  0.7675    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.356   
##   Scale est. = 0.48853   n = 721

Check independence

fit_resids <- as.numeric(resid(fit_mod$lme))

acf(fit_resids)

Our model passes the condition of independence as the ACF remains mostly within the confidence bounds and there are no trends.

Mean variance

gf_point(as.numeric(resid(fit_mod$lme, by = 'Year')) ~ as.numeric(fitted(fit_mod$lme))) + 
  labs(x = "Fitted values", 
       y = "Residuals")

Our model passes the condition of Mean Variance as there are no specific trends in the Residuals vs. Fitted scatter plot.

Prediction Plot

require(ggeffects)
## Prediction Plot for Leaf Drop Mass by Day for each Year
pred_year <- ggpredict(fit_mod, terms = c('Julian', 'Year'))
plot(pred_year) +
  labs(x = "Days", 
       y = "Leaf Drop Mass (g/m^2)", 
       color = "Year")

## Prediction Plot for Leaf Drop Mass by Day and Wind Speed
pred_ws <- ggpredict(fit_mod, terms = c("Julian", "WindSpeed"))
plot(pred_ws) +
  labs(x = "Days",
       y = "Leaf Drop Mass (g/m^2)",
       color = "Wind Speed (m/s)")

## Prediction Plot for Leaf Drop Mass by Average Temperature
pred_at <- ggpredict(fit_mod, terms = c("Julian", "AverageTemperature"))
plot(pred_at) +
  labs(x = "Days",
       y = "Leaf Drop Mass (g/m^2)",
       color = "AverageTemperature (C)")

The prediction plot is predicted by three categories: Year, WindSpeed, and AverageTemperature. The peak mass of leaves happens at around day 300 which is at the end of October. At Year 2010 is predicted to have the most leaf drop mass. It is predicted to have the most mass leaf drop at windspeed of around 20m/s. Leaves are predicted to drop most at temperature of 16 Celcius. The prediction graphs shows us that the leaves are predicted to drop relatively normally and unimodally which confirms our conclusions from graphic exploration.

anova(fit_mod$lme)

##   numDF denDF F-value p-value
## X     5   238 833.882  <.0001

Overall Conclusions

Our model, through prediction plots, seems to confirm what we interpreted from our graphics. Leaf Drop Mass takes a normal curve over time and the other predictors do have an effect on how many leaves drop as well. Small p-value (<0.05) indicates that we have strong evidence to our alternative hypothesis that there is correlation with our response variable. Our research questions consulted by Dr. Van Dragt were: How does the dry weight of leaves falling compare week to week? How do the seasons vary year to year? As in, does the leaf falling “season” begin earlier or later depending on the year? How does the weather affect the total dry weight of the leaves? Our group was able to provide several visualizations that compare weight of leaf drops within each baskets by days of year, weeks, and other factors as requested by Dr. Van Dragt. Although we knew that leaves fall in the fall season (due to Michigan’s distinct four seasons), it was fascinating to manipulate data to visualize and confirm that leaves do drop the most in late October through early November. Overall, we were extremely honored to work with Dr. Van Dragt and this project was meaningful and fulfilling experience for all us.

Ecosystem Preserve Project

Eco Preserve Team: Trey, Hansol, Quan, and Hayworth

September 2021

Read in the Given Datasets for 2010-2016 Leaf Drop Data

Rename Columns

Pasting Together

Replace dates labeled as ‘/16’ to ‘/2016’

Separate Date Variable into Month, Day, and Year Variables

Add New Date Variable and 1-365 Julian Date Variable

Add Week Variable

Add in Weather Data

Wrangle the Weather Data

Merging the Weather and Leaf Drop Data

Organize Final Dataset

North vs. South of Stream Variable

Conversion and Units

Write the File

Questions We will Attempt to Answer

Read in Completely Tidied Dataset

Create Average Weekly Leaf Drop Weight by Year Graph

Create Average Daily Leaf Drop Weight by Year Graph

Basket Graph

Basket Graph 2

Temperature Over Time

Leaf Drop Weight by Average Temperature

Leaf Drop Weight by Average Temperature

Leaf Drop Weights by Wind Speed by Month

North vs. South of Stream

North vs. South of Stream Basket Graph

Table of Leaf Drop Mass by Basket Across Years

Questions We Answered

Read in Completely Tidied Dataset

Choose Response and Predictor Variables

Fit the Model

Check independence

Mean variance

Prediction Plot

Overall Conclusions