Tallahassee Climate Extremes: Hot Days

Lede: The hots are getting hotter

Introduction

Here we look at the statistics of extremely hot daytime temperatures in Tallahassee, Florida. A record of daytime maximum temperatures from the airport reveals an increasing trend in the occurrence of 100\(+^\circ\)~F days since the 1960s. The trend results from an increase in the occurrence of hot events coupled with an increase in the average length of an event. It is speculated that the trend is related to changes in the intensity and location of the subtropical high pressure system, perhaps exacerbated by changes in land use and global warming.

Data

Hourly data: http://weatherspark.com/history/31772/2013/Tallahassee-Florida-United-States

Daily data: http://www.ncdc.noaa.gov/cdo-web/datasets

Read TLH data from NCDC.

df1 = read.csv(file = "http://myweb.fsu.edu/jelsner/data/TLH_DailySummary.csv",
               na.strings = "-9999", header=TRUE)
df1$TmaxF = round(9/5 * df1$TMAX/10 + 32) 
df1$TminF = round(9/5 * df1$TMIN/10 + 32)
df1$Date = as.Date(as.character(df1$DATE), format="%Y%m%d")
library(lubridate)
df1$Year = year(df1$Date)
df1$Month = month(df1$Date)
df1 = subset(df1, Year < 2014)
# from Weather Underground
df1$TmaxF[df1$Date == "2005-07-08"] = 96
df1$TminF[df1$Date == "2005-07-08"] = 71
TLH.df = df1

From the NWSO TLH: For temperature and precipitation records, there are two sets of data. The first two columns of data represent extremes recorded at the current observing site at the Tallahassee Regional Airport (previously the Tallahassee Municipal Airport). Prior to March 29, 1961, observations were taken at other locations in Tallahassee and records for these locations are listed in the last two columns. Observations were taken at the Dale Mabry Field from this date back to March 1940. Prior to that, observations were taken at various downtown locations beginning in April 1885.

July 8, 2005 had a missing high and low temperature. Values are obtained from Weather Underground.

Compare with Las Vegas, Nevada.

Read Las Vegas data from NCDC.

df1 = read.csv(file = "http://myweb.fsu.edu/jelsner/data/LV_DailySummary.csv",
               na.strings = "-9999", header=TRUE)
df1$TmaxF = round(9/5 * df1$TMAX/10 + 32) 
df1$Date = as.Date(as.character(df1$DATE), format="%Y%m%d")
require(lubridate)
df1$Year = year(df1$Date)
df1$Month = month(df1$Date)
df1 = subset(df1, Year < 2014)
LVG.df = df1

Bar plot of the number days at or above 100F by year.

suppressMessages(library(dplyr))
library(ggplot2)
TLH.df = tbl_df(TLH.df)
TLH.df2 = TLH.df %>% 
  group_by(Year) %>%
  summarize(N100 = sum(TmaxF >= 100, na.rm = TRUE),
            N99 = sum(TmaxF >= 99, na.rm = TRUE),
            avgTmaxF = mean(TmaxF, na.rm = TRUE))

ggplot(TLH.df2, aes(x = Year, y = avgTmaxF)) +
  theme_bw() +
  geom_point(size = 3) +
  geom_line() +
  ylab("Average Annual Temperature in Tallahassee, FL (F)")

plot of chunk unnamed-chunk-3

ggplot(TLH.df2, aes(x = Year, y = N100, fill = N100)) + 
  theme_bw() + 
  geom_bar(stat='identity') + 
  scale_fill_continuous(low='orange', high='red') +
  geom_text(aes(label = N100), vjust = 1.5, size = 3) +
  scale_x_continuous(breaks = seq(1950, 2013, 10)) +
  ylab(expression(paste("Number of days in Tallahassee, FL at or above 100", {}^o, " F"))) +
  xlab("") +
  theme(axis.text.x  = element_text(size=11), legend.position="none")

plot of chunk unnamed-chunk-3

Bar plot of the number of days at or above 100F by month.

TLH.df %>%
  group_by(Month) %>%
  summarize(Number = sum(TmaxF >= 100, na.rm = TRUE)) %>%
  mutate(MonthF = factor(month.abb[Month], levels = month.abb)) %>%
  ggplot(., aes(x = MonthF, y = Number, fill = Number)) +
  geom_bar(stat = 'identity') + 
  scale_fill_continuous(low = 'orange', high = 'red') +
  xlab("") + 
  theme_bw() +
  ylab(expression(paste("Number of days in Tallahassee, FL at or above 100", {}^o, " F"))) + 
  theme(legend.position = "none") +
  geom_text(aes(label = Number), vjust = 1.5, size = 3)

plot of chunk unnamed-chunk-4

Histogram of daily high temperature.

ggplot(TLH.df, aes(x = TmaxF)) + 
  geom_histogram(binwidth=1, aes(fill = ..count..)) +
  scale_fill_continuous(low = 'green', high = 'blue') +
  theme_bw() +
  ylab("Number of Days") + 
  xlab(expression(paste("Daily High Temperature in Tallahassee, FL (", {}^o, " F)"))) +
  theme(legend.position = "none")

plot of chunk unnamed-chunk-5

Convective vs non-convecting atmosphere? Compare with Las Vegas, NV.

L1 = nrow(TLH.df)
L2 = nrow(LVG.df)
TmaxF = c(TLH.df$TmaxF, LVG.df$TmaxF)
Where = c(rep("Tallahassee, FL", L1), rep("Las Vegas, NV", L2))
combine.df = data.frame(TmaxF, Where)
ggplot(data = combine.df, aes(x = TmaxF)) + 
  geom_histogram(binwidth = 1, aes(fill = ..count..)) +
  scale_fill_continuous(low = 'green', high = 'blue') +
  theme_bw() +
  ylab("Number of Days") + 
  xlab(expression(paste("Daily High Temperature (", {}^o, " F)"))) +
  theme(legend.position = "none") +
  facet_wrap(~ Where)

plot of chunk unnamed-chunk-6

Calendar heat map. Code from Nathan Yau flowingdata.com

cal <- function(dt) {
    # Reads a date object and returns a tuple (weekrow, daycol)
    # where weekrow starts at 1 and daycol starts at 1 for Sunday
    #http://swingleydev.org/blog/tag/r/
    year <- year(dt)
    month <- month(dt)
    day <- day(dt)
    wday_first <- wday(ymd(paste(year, month, 1, sep = '-'), quiet = TRUE))
    offset <- 7 + (wday_first - 2)
    weekrow <- ((day + offset) %/% 7) - 1
    daycol <- (day + offset) %% 7

    c(weekrow, daycol)
}
weekrow <- function(dt) {
    cal(dt)[1]
}
daycol <- function(dt) {
    cal(dt)[2]
}
vweekrow <- function(dts) {
    sapply(dts, weekrow)
}
vdaycol <- function(dts) {
    sapply(dts, daycol)
}
df4 = subset(TLH.df, Year >= 2006 & Month >= 5 & Month <= 8)
df4$month = month(df4$Date, label = TRUE, abbr = FALSE)
df4$weekrow = factor(vweekrow(df4$Date),
   levels = c(5, 4, 3, 2, 1, 0),
   labels = c('6', '5', '4', '3', '2', '1'))
df4$daycol = factor(vdaycol(df4$Date),
   labels = c('u', 'm', 't', 'w', 'r', 'f', 's'))
#df4$TmaxF[df4$TmaxF >= 100] = NA
df5 = subset(df4, TmaxF >= 100)

Plot calendar

library(scales)
library(grid)
ggplot(data = df4, aes(x = daycol, y = weekrow, fill = TmaxF)) +
    theme_bw() +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "white"),
          legend.position = "none",
 #         legend.position = "bottom",
 #         legend.key.width = unit(1, "in"),
          legend.margin = unit(0, "in")) +
    geom_tile(colour = "white") +
#    facet_grid(~ month) +
    facet_grid(Year ~ month) +
    scale_fill_continuous(name = "Temperature (°F)", low = "blue", high = "red") +
    geom_text(data = df4, aes(x = daycol, y = weekrow, label = TmaxF), 
              vjust = .5, size = 4, color = "white") +
    geom_text(data = df5, aes(x = daycol, y = weekrow, label = TmaxF), 
              vjust = .5, size = 4, color = "black")

plot of chunk unnamed-chunk-9

#    ggtitle("Daily High Temperature")
dailyRecs = TLH.df %>% 
  mutate(Day = day(Date)) %>%
  group_by(Month, Day) %>%
  summarize(dHmax = max(TmaxF, na.rm=TRUE),
            dHmin = min(TmaxF, na.rm=TRUE),
            dHmaxY = Year[which.max(TmaxF)],
            dHminY = Year[which.min(TmaxF)])

countHmax = dailyRecs %>%
  group_by(dHmaxY) %>%
  summarize(count = length(dHmaxY))

countHmin = dailyRecs %>%
  group_by(dHminY) %>%
  summarize(count = length(dHminY))

Plot on 2014 calendar.

tt = seq(as.Date("2014-01-01"), as.Date("2014-12-31"), by = "day")
dailyRecs = dailyRecs[-60, ] # Remove leap day as 2014 is not a leap year
dailyRecs$Date = tt
dailyRecs$month = month(dailyRecs$Date, label = TRUE, abbr = FALSE)
dailyRecs$weekrow = factor(vweekrow(dailyRecs$Date),
   levels = c(5, 4, 3, 2, 1, 0),
   labels = c('6', '5', '4', '3', '2', '1'))
dailyRecs$daycol = factor(vdaycol(dailyRecs$Date),
   labels = c('u', 'm', 't', 'w', 'r', 'f', 's'))
library(grid)
ggplot(data = dailyRecs, aes(x = daycol, y = weekrow, fill = dHmax)) +
    theme_bw() +
    theme(axis.text.x = element_blank(),
          axis.text.y = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks.x = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "white"),
 #         legend.position = "none",
          legend.position = "bottom",
          legend.key.width = unit(1, "in"),
          legend.margin = unit(0, "in")) +
    geom_tile(colour = "white") +
    facet_wrap(~ month, nrow=4) +
    scale_fill_continuous(name = "Temperature (°F)", 
                          low="blue", high="red") +
    geom_text(data=dailyRecs, aes(x = daycol, y = weekrow, label = dHmax), 
              vjust = .5, size = 5, color="white") +
    ggtitle("Tallahassee, Florida\nRecord Daily High Temperatures [1948-2013]\nData Source: NCDC")

plot of chunk unnamed-chunk-12

Interesting that the most frequent temperature is such a high value. Atmospheric conditions that conspire to produce extremely warm days in Tallahassee are broad scale. This means that the high pressure ridge producing subsidence and keeping the air relatively dry spreads across several states often encompassing the entire southeast. The occurrence of a 100\(+^\circ\)F day is often followed by a better than average chance of another hot day as positive feedback occurs.

Consecutive hot days tend to produce the hottest days. As a consequence of this clustering, it is necessary (statistically) to consider hot events rather than individual days. Here a hot event is defined as one or more days in which the temperature reaches at least 100F. A single event might consist of a single day or it may consist of several consecutive days.

Determine hot events.

hotEvents = rle(TLH.df$TmaxF >= 100)
eventLength = hotEvents$lengths[hotEvents$values]
eventNo = rep(1:length(eventLength), eventLength)
Events.df = subset(TLH.df, TmaxF >= 100)
Events.df$eventNo = eventNo

Determine the number of days between successive 100+F days. Add this as another column.

t1 = Events.df$Date[-length(Events.df$Date)]
t2 = Events.df$Date[-1]
dd = difftime(t2, t1, units = "days")
Events.df$dbe = c(NA, dd)
Events.df = Events.df %>% 
  select(TmaxF, TminF, Date, Year, Month, eventNo, dbe)

Length and intensity of events.

LI.df = Events.df %>%
  group_by(eventNo) %>%
  summarize(eventLength = length(TmaxF),
              avgEventT = mean(TmaxF),
              maxEventT = max(TmaxF),
              whenMaxEvT = which.max(TmaxF),
              Year = Year[1])

cor(LI.df$eventLength, LI.df$maxEventT)
## [1] 0.5202
ggplot(LI.df, aes(x = eventLength, y = whenMaxEvT)) +
  geom_point() +
  geom_smooth(method = lm) +
  xlab("Event Length (days)") +
  ylab("Day of Event When Maximum Occurs") +
  scale_x_continuous(breaks = 1:7) +
  theme_bw()

plot of chunk unnamed-chunk-15

LI.df2 = LI.df %>%
  group_by(Year) %>%
  summarize(count = length(Year),
            avgEL = mean(eventLength))

ggplot(LI.df2, aes(x = Year, y = avgEL)) +
  geom_point() +
  ylab("Average Event Length (days)") +
  theme_bw()

plot of chunk unnamed-chunk-16

AllYears = data.frame(Year = 1950:2013)
LI.df3 = merge(AllYears, LI.df2, by = "Year", all.x = TRUE)
LI.df3$count[is.na(LI.df3$count)] = 0

suppressMessages(library(MASS))
ggplot(LI.df3, aes(x = Year, y = count)) +
    geom_bar(stat = "identity") +
    ylab("Number of Hot Events in Tallahassee, FL") +
    scale_x_continuous(breaks = seq(1950, 2013, 10)) +
    stat_smooth(method = "glm.nb",
              formula = y ~ x, 
              data = LI.df3, se = TRUE) +
    theme_bw() 

plot of chunk unnamed-chunk-16

var(LI.df3$count)/mean(LI.df3$count)
## [1] 2.749
summary(glm.nb(count ~ Year, data = LI.df3))
## 
## Call:
## glm.nb(formula = count ~ Year, data = LI.df3, init.theta = 0.6874223114, 
##     link = log)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.332  -1.019  -0.821   0.356   1.841  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -53.1336    22.8683   -2.32     0.02 *
## Year          0.0267     0.0115    2.32     0.02 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.6874) family taken to be 1)
## 
##     Null deviance: 62.360  on 63  degrees of freedom
## Residual deviance: 56.188  on 62  degrees of freedom
## AIC: 165
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.687 
##           Std. Err.:  0.292 
## 
##  2 x log-likelihood:  -159.030
ggplot(LI.df, aes(x = Year, y = eventLength)) +
  geom_point(alpha = .5, size = 3) +
  scale_y_continuous(breaks = 1:8) +
  scale_x_continuous(breaks = seq(1950, 2013, 10)) +
  ylab("Length of Hot Event in Tallahassee, FL (days)") +
#  geom_quantile(quantile = .95) +
  theme_bw()

plot of chunk unnamed-chunk-17