The decreasing population bias in tornado reports over the central Plains

James B. Elsner, Laura E. Michaels, Kelsey N. Scheitlin, Ian J. Elsner

Code in support of our paper in review with the American Meteorological Society's journal Weather, Climate and Society.

date()

## [1] "Thu Feb 14 21:19:28 2013"

Preliminaries

Set working directory, load packages, and read bibtex file.

setwd("~/Dropbox/Tornadoes")
require(maptools)
require(maps)
require(ggplot2)
require(plyr)
require(evaluate)
require(reshape)
require(ggmap)
require(rgdal)
require(spatstat)
require(grid)
require(mapproj)
require(knitcitations)
require(bibtex)
require(reshape)
require(colorRamps)
require(raster)
require(lattice)

Tornado data

Download the tornado data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/

tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip", 
    "tornado.zip", mode = "wb")

Read data.

unzip("tornado.zip")
Torn = readOGR(dsn = ".", layer = "tornado")

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions

names(Torn)

##  [1] "OM"         "YEAR"       "MONTH"      "DAY"        "DATE"      
##  [6] "TIME"       "TIMEZONE"   "STATE"      "FIPS"       "STATENUMBE"
## [11] "FSCALE"     "INJURIES"   "FATALITIES" "LOSS"       "CROPLOSS"  
## [16] "SLAT"       "SLON"       "ELAT"       "ELON"       "LENGTH"    
## [21] "WIDTH"      "NS"         "SN"         "SG"         "F1"        
## [26] "F2"         "F3"         "F4"

proj4string(Torn)

## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"

The number of tornado reports in the data set is 56221. Here we consider tornadoes only within a region centered on Russell KS.

Obtain a google map centered on Russell KS with a zoom of 7. Subset the tornado reports by map domain.

RussellKS = geocode("Russell KS")
Map = get_map(location = unlist(RussellKS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)

The map shows touchdown locations of 6328 tornadoes.

Create a bar chart of tornado frequency by month type

p = ggplot(Torn2@data, aes(as.factor(MONTH))) + geom_bar()
p + xlab("Month") + ylab("Tornado Reports") + theme_bw()

plot of chunk barchartmonthlyfrequency

FIGURE 1

Create a map of the May/June tornadoes centered on Russell, KS.

Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
p = ggmap(Map, extent = "device") + geom_point(aes(x = SLON, y = SLAT), data = Torn3@data, 
    size = 1.2, color = "red") + ylab("") + xlab("")
p

plot of chunk mapMayJune

FIGURE 2

There are some years with no F3, F4, or F5 tornadoes. With .drop=FALSE these years include 0 for these F scale categories.

trpy = ddply(Torn3@data, .(YEAR, FSCALE), .drop = FALSE, "nrow")
trpy$Fscale = paste("F", trpy$FSCALE, sep = "")

Plot the tornado reports by F scale and year using faceting on the F scale variable. Faceting refers to multiple plots of the same variables conditioned on another variable. By default the scales are kept the same in each panel, but here we allow the scales to vary as specified by the scales argument in the facet wrap function.

trpy04 = trpy[trpy$FSCALE >= 0 & trpy$FSCALE <= 3, ]
p = ggplot(trpy04, aes(x = YEAR, y = nrow)) + geom_point() + facet_wrap(~Fscale, 
    scales = "free_y")
p = p + geom_smooth(method = "loess", span = 0.9, color = "black") + theme_bw()
p + ylab("Tornado Reports") + xlab("")

plot of chunk timeSeriesPlot

FIGURE 3

Tornado touchdown reports as spatial point pattern data

We examine the population bias in the tornado reports using functions in the spatstat package Baddeley & Turner, 2005.

Now we create a marked planar point pattern (ppp) where the marks are the year. We specify the unit name and apply a summary method.

Torn3.ppp = as(Torn3["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
summary(Torn3.ppp)

## Marked planar point pattern: 3879 points
## Average intensity 1.01e-08 points per square meter  
## 
## *Pattern contains duplicated points*
## 
## Coordinates are given to 2 decimal places 
## i.e. rounded to the nearest multiple of 0.01 meters  
## 
## marks are  numeric,  of type 'integer' 
## Summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1950    1970    1990    1990    2000    2010 
## 
## Window: rectangle = [-564770, 57800] x [-318285, 299106] meters 
## Window area =  3.84369e+11 square meters 
## Unit of length: 1 meter

The window area is 3.8437 × 10⁵ square km. There are 3879 touchdowns in the data set. Not all locations are unique. The average density is 1.0092 × 10^-8

Spatial density

Here we use an adaptive spatial density estimator that uses a fraction f of the data to construct a Dirichlet tessellation and the forms an density estimate that is constant in each tile of the tessellation. A randomization of the fraction chosen creates a somewhat different density estimate which are subsequently averaged.

den = density(Torn3.ppp, adjust = 0.25)
den$v = den$v * 1e+08
den.sgdf = as(den, "SpatialGridDataFrame")
p = spplot(den.sgdf, "v", col.regions = matlab.like(20), colorkey = list(space = "bottom", 
    labels = list(cex = 0.7)), sub = list("Tornado Reports/100 square km", cex = 1, 
    font = 1))

Are reports less numerous away from cities & towns?

To get city locations download the zip file from http://www.nws.noaa.gov/geodata/catalog/national/html/cities.htm. Read the data and subset within our map tile.

UScities = read.dbf("ci18my12.dbf")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo & 
    POP_1990 > 1000)
dim(Mapcities)[1]

## [1] 315

Here we get 315 cities with populations ranging from 1001 to 367302 residents. The POP_1990 attribute refers to the 1990 Census according to the metadata.

The POPULATION attribute is the 2000 Census value as a factor. To create an integer type

x = UScities$POPULATION
y = as.numeric(levels(x))[as.integer(x)]
sum(y < 1000)/length(y)
Mapcities2000 = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= 
    llo & y > 1000)
dim(Mapcities2000)[1]
Mapcities = Mapcities2000

Create a projected ppp object of the city locations.

ll = "+proj=longlat +datum=NAD83"
Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(Torn3.ppp))

## Warning: 2 points were rejected as lying outside the specified window

unitname(Mapcities.ppp) = c("meter", "meters")

Overlay a map of the city centers with the tornado report density.

cities = coords(Mapcities.ppp)
l1 = list("sp.points", cities, col = "black", pch = 16)
p = spplot(den.sgdf, "v", col.regions = matlab.like(20), sp.layout = list(l1), 
    colorkey = list(space = "bottom", labels = list(cex = 1)), sub = list(expression(paste("Tornado Reports/100 km", 
        {
        }^2)), cex = 1, font = 1))
p

plot of chunk densityCityoverlay

FIGURE 4

A city does not guarantee a concentration of tornado reports, but in regions without cities there are no significant report concentrations.

Create a map indicating the distance to the nearest city and then estimate the tornado density as a function of this distance. The method and bandwidth (bw) are selected based on trial and error.

Z = distmap(Mapcities.ppp)
m2km = 0.001
Z$v = Z$v * m2km
quantile(Z$v)

##       0%      25%      50%      75%     100% 
##  0.07971 10.73306 16.91444 23.98462 73.77729

dist.sgdf = as(Z, "SpatialGridDataFrame")
p1 = spplot(dist.sgdf, "v", col.regions = rev(cm.colors(7)), at = seq(0, 70, 
    10), colorkey = list(space = "bottom", labels = list(cex = 1)), sub = list("Distance From Nearest City Center (km)", 
    cex = 1, font = 1))
p1

plot of chunk distanceMap

x = as.vector(Z$v)
p2 = histogram(x, xlab = "Distance From Nearest City Center (km)", aspect = "xy", 
    endpoints = c(0, 75), nint = 14, col = "light gray", cex = 1, font = 1)
p2 = update(p2, main = textGrob("b", x = unit(0.05, "npc")), par.settings = list(fontsize = list(text = 12)))

Distance from nearest city center model.

Z = distmap(Mapcities.ppp)
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
# plot(rhohat(unmark(Torn3.ppp), Z, bw=.5, method='transform'))

Create a data frame from the output of the rhohat function that can be used with ggplot.

m2km = 0.001
km2 = 1e+08  # 100 square km
dist2 = rhat2$Z * m2km
rho2 = rhat2$rho * km2
hi2 = rhat2$hi * km2
lo2 = rhat2$lo * km2
Rhowide.df = data.frame(dist2 = dist2, rho2 = rho2, hi2 = hi2, lo2 = lo2)

Plot the tornado density in number per 100 square km as a function of distance from city.

p1 = ggplot(Rhowide.df) + geom_ribbon(aes(x = dist2, ymin = lo2, ymax = hi2), 
    alpha = 0.3) + geom_line(aes(x = dist2, y = rho2), col = "black") + scale_y_continuous(limits = c(0.7, 
    1.5)) + scale_x_continuous(limits = c(0, 60)) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2))) + xlab("Distance From Nearest City Center (km)") + theme_bw()
p1

## Warning: Removed 96 rows containing missing values (geom_path).

plot of chunk Rhoplot

FIGURE 6 Tornado density shows a peak above 1.35 per 100 square kilometers at the city center with a decrease in density at larger radii. The difference between using the raw data and the declustered data is small relative to the population bias (not shown). The percentage by which the reports near cities and towns exceeds that of the countryside is

(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100

## [1] 64.94

(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100

## [1] 81.49

(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100

## [1] 99.91


rhat2$lo[1]/rhat2$hi[length(rhat2$rho)]

## [1] 1.649

rhat2$rho[1]/rhat2$rho[length(rhat2$rho)]

## [1] 1.815

rhat2$hi[1]/rhat2$lo[length(rhat2$rho)]

## [1] 1.999

Are tornado reports more numerous at random locations?

set.seed(550342)
Randomlocations.ppp = runifpoint(n = Mapcities.ppp$n, win = Mapcities.ppp$window)
Z2 = distmap(Randomlocations.ppp)
rhat3 = rhohat(unmark(Torn3.ppp), Z2, bw = 0.25, method = "transform")
dist3 = rhat3$Z * m2km
rho3 = rhat3$rho * km2
hi3 = rhat3$hi * km2
lo3 = rhat3$lo * km2
Rhowide2.df = data.frame(dist3 = dist3, rho3 = rho3, hi3 = hi3, lo3 = lo3)
p2 = ggplot(Rhowide2.df) + geom_ribbon(aes(x = dist3, ymin = lo3, ymax = hi3), 
    alpha = 0.3) + geom_line(aes(x = dist3, y = rho3), col = "black") + scale_y_continuous(limits = c(0.7, 
    1.5)) + scale_x_continuous(limits = c(0, 60)) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2))) + xlab("Distance From Nearest Random Location (km)") + theme_bw()
p2

## Warning: Removed 11 rows containing missing values (geom_path).

plot of chunk randomLocations

FIGURE 7

Are tornado reports more numerous near radar sites?

Read the data and subset within our map tile.

Radar = read.table("RadarSites.txt", header = TRUE)

Here we get 8 radar sites.

Create a projected ppp object of the radar locations.

Radar.sdf = Radar
coordinates(Radar.sdf) = ~lon + lat
proj4string(Radar.sdf) = CRS(ll)
Radar.sdf = spTransform(Radar.sdf, CRS(proj4string(Torn3)))
Radar.ppp = as.ppp(coordinates(Radar.sdf), W = as.owin(Torn3.ppp))
unitname(Radar.ppp) = c("meter", "meters")
Z2 = distmap(Radar.ppp)
rhat3 = rhohat(unmark(Torn3.ppp), Z2, bw = 0.25, method = "transform")
dist3 = rhat3$Z * m2km
rho3 = rhat3$rho * km2
hi3 = rhat3$hi * km2
lo3 = rhat3$lo * km2
Rhowide2.df = data.frame(dist3 = dist3, rho3 = rho3, hi3 = hi3, lo3 = lo3)
p2 = ggplot(Rhowide2.df) + geom_ribbon(aes(x = dist3, ymin = lo3, ymax = hi3), 
    alpha = 0.3) + geom_line(aes(x = dist3, y = rho3), col = "red") + scale_y_continuous(limits = c(0.75, 
    1.42)) + ylab(expression(paste("(Tornado Reports/100 km", {
}^2, ")"))) + xlab("Distance From Nearest\n NWS WSR88D Radar Site (km)") + 
    theme_bw()
p2

plot of chunk projectedRadarLocationsPPP

rhat3$lo[1]/rhat3$hi[length(rhat3$rho)]

## [1] 1.191

rhat3$rho[1]/rhat3$rho[length(rhat3$rho)]

## [1] 1.309

rhat3$hi[1]/rhat3$lo[length(rhat3$rho)]

## [1] 1.439

Population bias now vs then

The population bias noted above is likely most severe earlier in time. To test this we repeat the analysis using different time windows.

m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
    ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi, 
    lo = lo)
Ratio.df = data.frame(Year = beginYear:endYear, ratio = ratio, ratioU = ratiohi, 
    ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo)

xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years

Plot the number density as a function of distance from the city center in 10-year intervals.

pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) + 
    geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") + 
    theme_bw()
pp

plot of chunk Rhoplot2

FIGURE 8

Time trends of tornado density in and out of town.

RatioLong.df = melt(Ratio.df, id.vars = "Year", measure.vars = c("rhoCity", 
    "rhoCountry"))
levels(RatioLong.df$variable)[levels(RatioLong.df$variable) == "rhoCity"] = "City"
levels(RatioLong.df$variable)[levels(RatioLong.df$variable) == "rhoCountry"] = "Country"
names(RatioLong.df)[names(RatioLong.df) == "variable"] = "Location"
ggplot(RatioLong.df, aes(x = Year, y = value, group = Location)) + geom_point(aes(color = Location)) + 
    geom_smooth() + ylab(expression(paste("Tornado Reports/100 km", {
}^2, " per Decade)"))) + scale_x_continuous(breaks = seq(1970, 2010, 10), labels = c("1961-1970", 
    "1971-1980", "1981-1990", "1991-2000", "2001-2010")) + xlab("10-year Interval") + 
    theme_bw() + theme(legend.justification = c(0, 1), legend.position = c(0, 
    1))

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk trends

FIGURE 9

  1970    2011

F1+ 140.0 23.85 F0+ 149.2 12.00

Plot the percentage by which tornado reports near cities and towns exceed that in the countryside with uncertainty lines on the estimates.

pp = ggplot(Ratio.df, aes(x = Year, y = ratio)) + geom_point() + geom_smooth(span = 0.75, 
    color = "black")
pp = pp + geom_linerange(aes(ymax = Ratio.df$ratioU, ymin = Ratio.df$ratioL))
pp = pp + scale_x_continuous(breaks = seq(1970, 2010, 10), labels = c("1961-1970", 
    "1971-1980", "1981-1990", "1991-2000", "2001-2010"))
pp = pp + xlab("10-year Interval") + theme_bw()
pp + ylab("Percentage by which Tornado Reports near Cities\n Exceeds those in the Country")

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk ratioPlot

FIGURE 10

F0, F1, F2+

Repeat the analysis using subsets of the database, F0 only, F1 only, and F2+.

Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE == 0)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100

## [1] 67.48

(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100

## [1] 91.06

(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100

## [1] 47.05

m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE == 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
    ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi, 
    lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) + 
    geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") + 
    theme_bw()
pp

plot of chunk ratioF0

FIGURE 11

Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE == 1)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100

## [1] 97.46

(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100

## [1] 140.2

(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100

## [1] 63.03

m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE == 1, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
    ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi, 
    lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) + 
    geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") + 
    theme_bw()
pp

plot of chunk ratioF1

FIGURE 12

Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE > 1)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100

## [1] 92.64

(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100

## [1] 147.9

(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100

## [1] 50.84

m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 1, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
    ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi, 
    lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) + 
    geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") + 
    theme_bw()
pp

plot of chunk ratioF2plus

FIGURE 13

More storm chasers

A plausible explanation for this decline in population bias is the increasing surviellance of tornadoes by storm spotters and chasers. Storm chasing has increased in popularity and the number of chasers that roam tornado alley is on the rise.

Files containing number of newspaper articles that mention the phrase 'storm chaser' (SC) and 'tornado chaser' on newspaperarchive.com since 1980. The file is PaperChase.csv.

NPmentions = read.csv("PaperChase.csv", header = TRUE)
Gmentions = read.csv("GoogleChase.csv", header = TRUE)

Moving average function.

ma = function(x, n = 10) {
    filter(x, rep(1/n, n), sides = 1)
}

Storm chase in newspapers per year.

GC = as.vector(ma(Gmentions$Chaser))
GC = GC[10:31]
GC.df = data.frame(Year = 1989:2010, GC = GC)
TC = as.vector(ma(NPmentions$TC[-length(NPmentions$TC)]))
TC.df = data.frame(Year = 1980:2010, TC = TC)

Plot the number of Google pages mentioning storm chasers.

ggplot(GC.df, aes(x = Year, y = GC)) + geom_point() + geom_smooth(span = 0.5) + 
    scale_y_reverse() + ylab("Ten-year Avg Google Pages\n axis reversed") + 
    xlab("Ten-year interval") + theme_bw() + scale_x_continuous(breaks = seq(1990, 
    2010, 5), labels = c("1981-1990", "1986-1995", "1991-2000", "1996-2005", 
    "2001-2010"))

Merged = merge(Ratio.df, GC.df, "Year")
Merged3 = subset(Merged, Year >= 1996)
summary(lm(rhoCountry ~ Year + GC, data = Merged3))

## 
## Call:
## lm(formula = rhoCountry ~ Year + GC, data = Merged3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.00895 -0.00403 -0.00112  0.00197  0.01483 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.516210   2.263651   -0.23     0.82    
## Year         0.000320   0.001133    0.28     0.78    
## GC           0.006435   0.000925    6.96  1.5e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.00737 on 12 degrees of freedom
## Multiple R-squared: 0.966,   Adjusted R-squared: 0.961 
## F-statistic:  172 on 2 and 12 DF,  p-value: 1.45e-09

summary(lm(rhoCity ~ Year + GC, data = Merged3))

## 
## Call:
## lm(formula = rhoCity ~ Year + GC, data = Merged3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.02496 -0.01283 -0.00379  0.01243  0.03764 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 17.67076    5.73059    3.08   0.0095 **
## Year        -0.00871    0.00287   -3.04   0.0103 * 
## GC           0.00764    0.00234    3.26   0.0068 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0187 on 12 degrees of freedom
## Multiple R-squared: 0.47,    Adjusted R-squared: 0.382 
## F-statistic: 5.32 on 2 and 12 DF,  p-value: 0.0221

Long3 = melt(Merged3, id = c("GC"), measure = c("rhoCity", "rhoCountry"))
levels(Long3$variable)[levels(Long3$variable) == "rhoCity"] = "City"
levels(Long3$variable)[levels(Long3$variable) == "rhoCountry"] = "Country"
Long4 = melt(Merged3, id = c("GC"), measure = c("rhoCitylo", "rhoCountrylo"))
Long5 = melt(Merged3, id = c("GC"), measure = c("rhoCityhi", "rhoCountryhi"))
Final = data.frame(Long3, lo = Long4[, 3], hi = Long5[, 3])
pp = ggplot(Final, aes(x = GC, y = value)) + geom_point() + geom_smooth(color = "black") + 
    facet_wrap(~variable) + geom_linerange(aes(ymax = hi, ymin = lo)) + theme_bw() + 
    xlab(expression(paste("Google News", {
    }^TM, " Mentions of 'Storm Chaser'"))) + ylab(expression(paste("Tornado Reports/100 km", 
    {
    }^2)))

Create a two panel plot.

pp

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.

plot of chunk tornadoReportsChasers

FIGURE 14

pp = ggplot(Merged3, aes(x = GC, y = rhoCountry)) + geom_point() + geom_smooth()
pp = pp + geom_linerange(aes(ymax = rhoCountryhi, ymin = rhoCountrylo))
pp = pp + xlab(expression(paste("Google News", {
}^TM, " Mentions of 'Storm Chaser'")))
p1 = pp + ylab(expression(paste("County Reports/100 km", {
}^2))) + theme_bw()

pp = ggplot(Merged3, aes(x = GC, y = rhoCity)) + geom_point() + geom_smooth()
pp = pp + geom_linerange(aes(ymax = rhoCityhi, ymin = rhoCitylo))
pp = pp + xlab(expression(paste("Google News", {
}^TM, " Mentions of 'Storm Chaser'")))
p2 = pp + ylab(expression(paste("City Reports/100 km", {
}^2))) + theme_bw()

Annual population bias

RussellKS = geocode("Russell KS")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(RussellKS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo & 
    POP_1990 > allpop)
dim(Mapcities)[1]

## [1] 315

Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))
unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    nt = c(nt, Torn4.ppp$n)
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
    ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("Central Plains", length(beginYear:endYear))
R1.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi, 
    ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo, 
    Region = Region)
# ggplot(R1.df, aes(Year, ratio)) + geom_point() +
# geom_smooth(method='lm') + theme_bw()

Regional changes

Annual percentage (factor) by which city density exceeds country density in the South and Midwest.

ArmoryMS = geocode("Armory MS")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(ArmoryMS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo & 
    POP_1990 > allpop)
dim(Mapcities)[1]

## [1] 964

Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))

## Warning: 4 points were rejected as lying outside the specified window

## Warning: data contain duplicated points

unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    nt = c(nt, Torn4.ppp$n)
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
    ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("South", length(beginYear:endYear))
R2.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi, 
    ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo, 
    Region = Region)
# ggplot(R2.df[R2.df$nt>=10,], aes(Year, ratio)) + geom_point() +
# geom_smooth(method='lm') + theme_bw()

TerreHauteIN = geocode("Terre Haute IN")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(TerreHauteIN), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo & 
    POP_1990 > allpop)
dim(Mapcities)[1]

## [1] 2053

Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))

## Warning: 48 points were rejected as lying outside the specified window

unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
    Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
    nt = c(nt, Torn4.ppp$n)
    rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
    dist = c(dist, rhat$Z * m2km)
    rho = c(rho, rhat$rho * km2)
    rhoCity = c(rhoCity, rhat$rho[1] * km2)
    rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
    rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
    rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
    rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
    rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
    ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
    hi = c(hi, rhat$hi * km2)
    lo = c(lo, rhat$lo * km2)
    ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
    ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
    Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("Midwest", length(beginYear:endYear))
R3.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi, 
    ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi, 
    rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo, 
    Region = Region)
ggplot(R3.df[R3.df$nt >= 20, ], aes(Year, ratio)) + geom_point() + geom_smooth(method = "lm") + 
    theme_bw()

plot of chunk RegionalChangesIN

Combine files

Final2 = rbind(R1.df, R2.df, R3.df)
ggplot(Final2[Final2$nt >= 20, ], aes(Year, ratio)) + geom_point() + geom_smooth(method = "lm") + 
    theme_bw() + facet_grid(. ~ Region) + ylab("Factor By Which Tornado Report Density In Cities\n Exceeds That in Rural Areas")

plot of chunk SinglePlot