We're going to produce a “Haywood Plot” showing cumulative mean Forest Fire Danger Index, for data from Marysville in the southern Australian Alps, from 1900 to 2011.
First we'll load in the data, break it down into years, and calculate a cumulative mean value throughout the year for each year.
options(rpubs.upload.method = "internal")
station_data = read.csv("G:\\SILO_Data\\complete_stations\\88044.csv", stringsAsFactors = F)
years = unique(station_data$year)
out_frame = data.frame(year = 0, date = as.Date("1900-01-01"), ca = 0)
for (the_year in years) {
this_ss = subset(station_data, year == the_year)
this_ss$c_date = as.Date(this_ss$c_date)
ovec = c()
for (idx in seq_along(this_ss$c_date)) {
tavg = mean(this_ss$IDMA[1:idx])
ovec = c(ovec, tavg)
}
thisdf = data.frame(year = the_year, date = as.Date(this_ss$c_date), ca = ovec)
out_frame = rbind(out_frame, thisdf)
}
out_frame = subset(out_frame, year != 0)
out_frame$jd = format(out_frame$date, format = "%j")
Now we'll aggregate to identify the five years with the highest mean value, so we can highlight them.
agg_frame = aggregate(out_frame$ca, by = list(out_frame$year), FUN = mean)
sort.order = order(-agg_frame$x)[1:5]
big_years = agg_frame$Group.1[sort.order]
title = paste("Marysville Top years", big_years[1], big_years[2], big_years[3],
big_years[4], big_years[5], sep = ", ")
And now for the plot.
for (idx in seq_along(years)) {
the_year = years[idx]
this_ss = subset(out_frame, year == the_year)
if (idx == 1) {
plot(ca ~ jd, data = this_ss, type = "l", ylim = c(0, max(out_frame$ca +
1)), main = title, xlab = "Julian Day", ylab = "FFDI")
} else {
lines(ca ~ jd, data = this_ss)
}
if (idx %in% sort.order) {
lines(ca ~ jd, data = this_ss, col = "red", lwd = 3)
}
}