Weather Criteria over Time

In this analysis, we seek to plot changes in a fire-related meteorological index over time, for the archive of meterological data, to see if we can observer any changes. The methodology is based on Groisman et al. 2007 who used number of days with KBDI over a percentile threshold as a measurement of fire season seversity.

First we will set up the data sources for the analysis. We'll grab SOI as well.

library(ggplot2)  # For plotting final graph
library(zoo)  # Has a nice rolling average function
## Attaching package: 'zoo'
## The following object(s) are masked from 'package:base':
## 
## as.Date, as.Date.numeric

station_root = "G:\\SILO_Data\\complete_stations\\"  # Where the met station data is stored
station_list = read.csv("E:\\NASA_NERP\\HotspotFFDI\\data\\ready\\silo_stations.csv")  # Station geographic locations
soi = read.fwf("http://www.longpaddock.qld.gov.au/seasonalclimateoutlook/southernoscillationindex/soidatafiles/MonthlySOIPhase1887-1989Base.txt", 
    widths = c(5, 5, 5, 5), skip = 1, colClasses = "numeric", col.names = c("year", 
        "month", "soi", "phase"))
soi$suby = soi$year + (soi$month - 1)/12
soi_vec = soi$soi
names(soi_vec) = soi$suby
sm.soi = rollmean(soi_vec, 12 * 10)
so_frame = data.frame(year = names(sm.soi), soi = sm.soi)

We define the counting function - it simply counts the number of values in a vector above a given threshold.

count_KBDI = function(x, threshold) {
    sum(x >= threshold)
}

Now we are going to loop through every zone, within each zone pick a maximum of 20 stations randomly, and for each station, determine the 90th percentile of KBDI values for the entire record. On a yearly basis, we will then count how many days have KBDI values above that baseline threshold. Yearly counts will then be averaged across all stations for a regional zone.

out_data = data.frame(zone = 0, year = 0, days = 0)  # Storage frame
for (zone in 1:61) {
    stations_to_get = subset(station_list, station_list$GRIDCODE == zone)$no_
    if (length(stations_to_get) > 20) {
        # If more than 20 stations, pick 20 randomly to use
        stations_to_get = sample(stations_to_get, 20)
    }
    zone_table = data.frame(year = 0, days = 0)
    for (idx in seq_along(stations_to_get)) {
        station = stations_to_get[idx]
        station_data = read.csv(paste(station_root, station, ".csv", sep = ""), 
            stringsAsFactors = F)
        cutoff_KBDI = quantile(station_data$IDK, 0.9)
        day_summary = tapply(station_data$IDK, station_data$year, count_KBDI, 
            threshold = cutoff_KBDI)
        stat_frame = data.frame(year = rownames(day_summary), days = day_summary)
        zone_table = rbind(zone_table, stat_frame)
    }
    zone_table = tapply(zone_table$days, zone_table$year, mean)
    zone_table = data.frame(zone = rep(zone, length(zone_table)), year = names(zone_table), 
        days = zone_table)
    out_data = rbind(out_data, zone_table)
}
out_data = subset(out_data, year != 0)

Let's plot a single zone - 33, the Australian Alps. And we'll add a rolling mean.

zone_set = subset(out_data, zone == 33)
plot(zone_set$days ~ zone_set$year, type = "l", xlab = "Year", ylab = "Days KBDI > 90%", 
    main = "Alps")
lines(rollmean(zone_set$days, 15) ~ names(rollmean(zone_set$days, 15)), col = "darkgreen", 
    lwd = 2)

plot of chunk unnamed-chunk-4

In fact, let's calculate that rolling mean for all cases, and plot a few of them together, using ggplot2 to make them pretty.

roll_frame = data.frame(zone = 0, year = 0, days = 0)
for (zoner in 1:61) {
    zone_set = subset(out_data, zone == zoner)
    r_mean = rollmean(zone_set$days, 15)
    zone_table = data.frame(zone = rep(zoner, length(r_mean)), year = names(r_mean), 
        days = r_mean)
    roll_frame = rbind(roll_frame, zone_table)
}
roll_frame = subset(roll_frame, year != 0)
roll_frame$zone = as.factor(roll_frame$zone)
roll_frame$zone = as.factor(roll_frame$zone)
roll_frame = subset(roll_frame, zone == "33" | zone == "16" | zone == "21" | 
    zone == "18" | zone == "25")
p <- ggplot(roll_frame, aes(x = year, y = days, group = zone))
p + facet_grid(zone ~ .) + geom_line(aes(color = zone)) + xlim(1905, 2005) + 
    scale_x_discrete(breaks = seq(1905, 2005, 5), labels = seq(1905, 2005, 5))

plot of chunk unnamed-chunk-5

And now just for comparison, here's a plot of SOI phase over the same period.

so_frame$year = as.numeric(as.character(so_frame$year))
g <- ggplot(so_frame, aes(year, soi, group = NA))
g + geom_line() + scale_x_continuous(breaks = seq(1905, 2005, 5), labels = seq(1905, 
    2005, 5)) + xlim(1905, 2005)
## Warning: Removed 319 rows containing missing values (geom_path).

plot of chunk unnamed-chunk-6