Code in support of our paper in review with the American Meteorological Society's journal Weather, Climate and Society.
date()
## [1] "Thu Feb 14 21:19:28 2013"
Set working directory, load packages, and read bibtex file.
setwd("~/Dropbox/Tornadoes")
require(maptools)
require(maps)
require(ggplot2)
require(plyr)
require(evaluate)
require(reshape)
require(ggmap)
require(rgdal)
require(spatstat)
require(grid)
require(mapproj)
require(knitcitations)
require(bibtex)
require(reshape)
require(colorRamps)
require(raster)
require(lattice)
Download the tornado data from the Storm Prediction Center (SPC) http://www.spc.noaa.gov/gis/svrgis/
tmp = download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode = "wb")
Read data.
unzip("tornado.zip")
Torn = readOGR(dsn = ".", layer = "tornado")
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 56221 features and 28 fields
## Feature type: wkbPoint with 2 dimensions
names(Torn)
## [1] "OM" "YEAR" "MONTH" "DAY" "DATE"
## [6] "TIME" "TIMEZONE" "STATE" "FIPS" "STATENUMBE"
## [11] "FSCALE" "INJURIES" "FATALITIES" "LOSS" "CROPLOSS"
## [16] "SLAT" "SLON" "ELAT" "ELON" "LENGTH"
## [21] "WIDTH" "NS" "SN" "SG" "F1"
## [26] "F2" "F3" "F4"
proj4string(Torn)
## [1] "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=39 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
The number of tornado reports in the data set is 56221. Here we consider tornadoes only within a region centered on Russell KS.
Obtain a google map centered on Russell KS with a zoom of 7. Subset the tornado reports by map domain.
RussellKS = geocode("Russell KS")
Map = get_map(location = unlist(RussellKS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
The map shows touchdown locations of 6328 tornadoes.
Create a bar chart of tornado frequency by month type
p = ggplot(Torn2@data, aes(as.factor(MONTH))) + geom_bar()
p + xlab("Month") + ylab("Tornado Reports") + theme_bw()
FIGURE 1
Create a map of the May/June tornadoes centered on Russell, KS.
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
p = ggmap(Map, extent = "device") + geom_point(aes(x = SLON, y = SLAT), data = Torn3@data,
size = 1.2, color = "red") + ylab("") + xlab("")
p
FIGURE 2
There are some years with no F3, F4, or F5 tornadoes. With .drop=FALSE these years include 0 for these F scale categories.
trpy = ddply(Torn3@data, .(YEAR, FSCALE), .drop = FALSE, "nrow")
trpy$Fscale = paste("F", trpy$FSCALE, sep = "")
Plot the tornado reports by F scale and year using faceting on the F scale variable. Faceting refers to multiple plots of the same variables conditioned on another variable. By default the scales are kept the same in each panel, but here we allow the scales to vary as specified by the scales argument in the facet wrap function.
trpy04 = trpy[trpy$FSCALE >= 0 & trpy$FSCALE <= 3, ]
p = ggplot(trpy04, aes(x = YEAR, y = nrow)) + geom_point() + facet_wrap(~Fscale,
scales = "free_y")
p = p + geom_smooth(method = "loess", span = 0.9, color = "black") + theme_bw()
p + ylab("Tornado Reports") + xlab("")
FIGURE 3
We examine the population bias in the tornado reports using functions in the spatstat package Baddeley & Turner, 2005.
Now we create a marked planar point pattern (ppp) where the marks are the year. We specify the unit name and apply a summary method.
Torn3.ppp = as(Torn3["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
summary(Torn3.ppp)
## Marked planar point pattern: 3879 points
## Average intensity 1.01e-08 points per square meter
##
## *Pattern contains duplicated points*
##
## Coordinates are given to 2 decimal places
## i.e. rounded to the nearest multiple of 0.01 meters
##
## marks are numeric, of type 'integer'
## Summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1950 1970 1990 1990 2000 2010
##
## Window: rectangle = [-564770, 57800] x [-318285, 299106] meters
## Window area = 3.84369e+11 square meters
## Unit of length: 1 meter
The window area is 3.8437 × 105 square km. There are 3879 touchdowns in the data set. Not all locations are unique. The average density is 1.0092 × 10-8
Here we use an adaptive spatial density estimator that uses a fraction f of the data to construct a Dirichlet tessellation and the forms an density estimate that is constant in each tile of the tessellation. A randomization of the fraction chosen creates a somewhat different density estimate which are subsequently averaged.
den = density(Torn3.ppp, adjust = 0.25)
den$v = den$v * 1e+08
den.sgdf = as(den, "SpatialGridDataFrame")
p = spplot(den.sgdf, "v", col.regions = matlab.like(20), colorkey = list(space = "bottom",
labels = list(cex = 0.7)), sub = list("Tornado Reports/100 square km", cex = 1,
font = 1))
To get city locations download the zip file from http://www.nws.noaa.gov/geodata/catalog/national/html/cities.htm. Read the data and subset within our map tile.
UScities = read.dbf("ci18my12.dbf")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo &
POP_1990 > 1000)
dim(Mapcities)[1]
## [1] 315
Here we get 315 cities with populations ranging from 1001 to 367302 residents. The POP_1990 attribute refers to the 1990 Census according to the metadata.
The POPULATION attribute is the 2000 Census value as a factor. To create an integer type
x = UScities$POPULATION
y = as.numeric(levels(x))[as.integer(x)]
sum(y < 1000)/length(y)
Mapcities2000 = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >=
llo & y > 1000)
dim(Mapcities2000)[1]
Mapcities = Mapcities2000
Create a projected ppp object of the city locations.
ll = "+proj=longlat +datum=NAD83"
Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(Torn3.ppp))
## Warning: 2 points were rejected as lying outside the specified window
unitname(Mapcities.ppp) = c("meter", "meters")
Overlay a map of the city centers with the tornado report density.
cities = coords(Mapcities.ppp)
l1 = list("sp.points", cities, col = "black", pch = 16)
p = spplot(den.sgdf, "v", col.regions = matlab.like(20), sp.layout = list(l1),
colorkey = list(space = "bottom", labels = list(cex = 1)), sub = list(expression(paste("Tornado Reports/100 km",
{
}^2)), cex = 1, font = 1))
p
FIGURE 4
A city does not guarantee a concentration of tornado reports, but in regions without cities there are no significant report concentrations.
Create a map indicating the distance to the nearest city and then estimate the tornado density as a function of this distance. The method and bandwidth (bw) are selected based on trial and error.
Z = distmap(Mapcities.ppp)
m2km = 0.001
Z$v = Z$v * m2km
quantile(Z$v)
## 0% 25% 50% 75% 100%
## 0.07971 10.73306 16.91444 23.98462 73.77729
dist.sgdf = as(Z, "SpatialGridDataFrame")
p1 = spplot(dist.sgdf, "v", col.regions = rev(cm.colors(7)), at = seq(0, 70,
10), colorkey = list(space = "bottom", labels = list(cex = 1)), sub = list("Distance From Nearest City Center (km)",
cex = 1, font = 1))
p1
x = as.vector(Z$v)
p2 = histogram(x, xlab = "Distance From Nearest City Center (km)", aspect = "xy",
endpoints = c(0, 75), nint = 14, col = "light gray", cex = 1, font = 1)
p2 = update(p2, main = textGrob("b", x = unit(0.05, "npc")), par.settings = list(fontsize = list(text = 12)))
Distance from nearest city center model.
Z = distmap(Mapcities.ppp)
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
# plot(rhohat(unmark(Torn3.ppp), Z, bw=.5, method='transform'))
Create a data frame from the output of the rhohat function that can be used with ggplot.
m2km = 0.001
km2 = 1e+08 # 100 square km
dist2 = rhat2$Z * m2km
rho2 = rhat2$rho * km2
hi2 = rhat2$hi * km2
lo2 = rhat2$lo * km2
Rhowide.df = data.frame(dist2 = dist2, rho2 = rho2, hi2 = hi2, lo2 = lo2)
Plot the tornado density in number per 100 square km as a function of distance from city.
p1 = ggplot(Rhowide.df) + geom_ribbon(aes(x = dist2, ymin = lo2, ymax = hi2),
alpha = 0.3) + geom_line(aes(x = dist2, y = rho2), col = "black") + scale_y_continuous(limits = c(0.7,
1.5)) + scale_x_continuous(limits = c(0, 60)) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2))) + xlab("Distance From Nearest City Center (km)") + theme_bw()
p1
## Warning: Removed 96 rows containing missing values (geom_path).
FIGURE 6 Tornado density shows a peak above 1.35 per 100 square kilometers at the city center with a decrease in density at larger radii. The difference between using the raw data and the declustered data is small relative to the population bias (not shown). The percentage by which the reports near cities and towns exceeds that of the countryside is
(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100
## [1] 64.94
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100
## [1] 81.49
(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100
## [1] 99.91
rhat2$lo[1]/rhat2$hi[length(rhat2$rho)]
## [1] 1.649
rhat2$rho[1]/rhat2$rho[length(rhat2$rho)]
## [1] 1.815
rhat2$hi[1]/rhat2$lo[length(rhat2$rho)]
## [1] 1.999
set.seed(550342)
Randomlocations.ppp = runifpoint(n = Mapcities.ppp$n, win = Mapcities.ppp$window)
Z2 = distmap(Randomlocations.ppp)
rhat3 = rhohat(unmark(Torn3.ppp), Z2, bw = 0.25, method = "transform")
dist3 = rhat3$Z * m2km
rho3 = rhat3$rho * km2
hi3 = rhat3$hi * km2
lo3 = rhat3$lo * km2
Rhowide2.df = data.frame(dist3 = dist3, rho3 = rho3, hi3 = hi3, lo3 = lo3)
p2 = ggplot(Rhowide2.df) + geom_ribbon(aes(x = dist3, ymin = lo3, ymax = hi3),
alpha = 0.3) + geom_line(aes(x = dist3, y = rho3), col = "black") + scale_y_continuous(limits = c(0.7,
1.5)) + scale_x_continuous(limits = c(0, 60)) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2))) + xlab("Distance From Nearest Random Location (km)") + theme_bw()
p2
## Warning: Removed 11 rows containing missing values (geom_path).
FIGURE 7
Read the data and subset within our map tile.
Radar = read.table("RadarSites.txt", header = TRUE)
Here we get 8 radar sites.
Create a projected ppp object of the radar locations.
Radar.sdf = Radar
coordinates(Radar.sdf) = ~lon + lat
proj4string(Radar.sdf) = CRS(ll)
Radar.sdf = spTransform(Radar.sdf, CRS(proj4string(Torn3)))
Radar.ppp = as.ppp(coordinates(Radar.sdf), W = as.owin(Torn3.ppp))
unitname(Radar.ppp) = c("meter", "meters")
Z2 = distmap(Radar.ppp)
rhat3 = rhohat(unmark(Torn3.ppp), Z2, bw = 0.25, method = "transform")
dist3 = rhat3$Z * m2km
rho3 = rhat3$rho * km2
hi3 = rhat3$hi * km2
lo3 = rhat3$lo * km2
Rhowide2.df = data.frame(dist3 = dist3, rho3 = rho3, hi3 = hi3, lo3 = lo3)
p2 = ggplot(Rhowide2.df) + geom_ribbon(aes(x = dist3, ymin = lo3, ymax = hi3),
alpha = 0.3) + geom_line(aes(x = dist3, y = rho3), col = "red") + scale_y_continuous(limits = c(0.75,
1.42)) + ylab(expression(paste("(Tornado Reports/100 km", {
}^2, ")"))) + xlab("Distance From Nearest\n NWS WSR88D Radar Site (km)") +
theme_bw()
p2
rhat3$lo[1]/rhat3$hi[length(rhat3$rho)]
## [1] 1.191
rhat3$rho[1]/rhat3$rho[length(rhat3$rho)]
## [1] 1.309
rhat3$hi[1]/rhat3$lo[length(rhat3$rho)]
## [1] 1.439
The population bias noted above is likely most severe earlier in time. To test this we repeat the analysis using different time windows.
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi,
lo = lo)
Ratio.df = data.frame(Year = beginYear:endYear, ratio = ratio, ratioU = ratiohi,
ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi,
rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
Plot the number density as a function of distance from the city center in 10-year intervals.
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) +
geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") +
theme_bw()
pp
FIGURE 8
Time trends of tornado density in and out of town.
RatioLong.df = melt(Ratio.df, id.vars = "Year", measure.vars = c("rhoCity",
"rhoCountry"))
levels(RatioLong.df$variable)[levels(RatioLong.df$variable) == "rhoCity"] = "City"
levels(RatioLong.df$variable)[levels(RatioLong.df$variable) == "rhoCountry"] = "Country"
names(RatioLong.df)[names(RatioLong.df) == "variable"] = "Location"
ggplot(RatioLong.df, aes(x = Year, y = value, group = Location)) + geom_point(aes(color = Location)) +
geom_smooth() + ylab(expression(paste("Tornado Reports/100 km", {
}^2, " per Decade)"))) + scale_x_continuous(breaks = seq(1970, 2010, 10), labels = c("1961-1970",
"1971-1980", "1981-1990", "1991-2000", "2001-2010")) + xlab("10-year Interval") +
theme_bw() + theme(legend.justification = c(0, 1), legend.position = c(0,
1))
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
FIGURE 9
1970 2011
F1+ 140.0 23.85 F0+ 149.2 12.00
Plot the percentage by which tornado reports near cities and towns exceed that in the countryside with uncertainty lines on the estimates.
pp = ggplot(Ratio.df, aes(x = Year, y = ratio)) + geom_point() + geom_smooth(span = 0.75,
color = "black")
pp = pp + geom_linerange(aes(ymax = Ratio.df$ratioU, ymin = Ratio.df$ratioL))
pp = pp + scale_x_continuous(breaks = seq(1970, 2010, 10), labels = c("1961-1970",
"1971-1980", "1981-1990", "1991-2000", "2001-2010"))
pp = pp + xlab("10-year Interval") + theme_bw()
pp + ylab("Percentage by which Tornado Reports near Cities\n Exceeds those in the Country")
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
FIGURE 10
Repeat the analysis using subsets of the database, F0 only, F1 only, and F2+.
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE == 0)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100
## [1] 67.48
(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100
## [1] 91.06
(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100
## [1] 47.05
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE == 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi,
lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) +
geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") +
theme_bw()
pp
FIGURE 11
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE == 1)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100
## [1] 97.46
(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100
## [1] 140.2
(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100
## [1] 63.03
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE == 1, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi,
lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) +
geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") +
theme_bw()
pp
FIGURE 12
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3 = subset(Torn3, FSCALE > 1)
Torn3.sdf = Torn3
Torn3.ppp = as(Torn3.sdf["YEAR"], "ppp")
unitname(Torn3.ppp) = c("meter", "meters")
rhat2 = rhohat(unmark(Torn3.ppp), Z, bw = 0.25, method = "transform")
(rhat2$rho[1]/rhat2$rho[length(rhat2$rho)] - 1) * 100
## [1] 92.64
(rhat2$hi[1]/rhat2$lo[length(rhat2$rho)] - 1) * 100
## [1] 147.9
(rhat2$lo[1]/rhat2$hi[length(rhat2$rho)] - 1) * 100
## [1] 50.84
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
beginYear = 1970
endYear = 2011
DT = 10
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 1, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, (rhat$rho[1]/rhat$rho[length(rhat$rho)] - 1) * 100)
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, (rhat$hi[1]/rhat$hi[length(rhat$hi)] - 1) * 100)
ratiolo = c(ratiolo, (rhat$lo[1]/rhat$lo[length(rhat$lo)] - 1) * 100)
Year = c(Year, rep(yr, length(rhat$Z)))
}
Rhowide2.df = data.frame(Year = factor(Year), dist = dist, rho = rho, hi = hi,
lo = lo)
xx = Year
yy = xx - 9
Years = paste(yy, "-", xx, sep = "")
Rhowide2.df$Years = Years
pp = ggplot(Rhowide2.df) + geom_line(aes(x = dist, y = rho)) + facet_wrap(~Years) +
geom_ribbon(aes(x = dist, ymin = lo, ymax = hi), alpha = 0.2) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2, " per Decade"))) + xlab("Distance From Nearest City Center (km)") +
theme_bw()
pp
FIGURE 13
A plausible explanation for this decline in population bias is the increasing surviellance of tornadoes by storm spotters and chasers. Storm chasing has increased in popularity and the number of chasers that roam tornado alley is on the rise.
Files containing number of newspaper articles that mention the phrase 'storm chaser' (SC) and 'tornado chaser' on newspaperarchive.com since 1980. The file is PaperChase.csv.
NPmentions = read.csv("PaperChase.csv", header = TRUE)
Gmentions = read.csv("GoogleChase.csv", header = TRUE)
Moving average function.
ma = function(x, n = 10) {
filter(x, rep(1/n, n), sides = 1)
}
Storm chase in newspapers per year.
GC = as.vector(ma(Gmentions$Chaser))
GC = GC[10:31]
GC.df = data.frame(Year = 1989:2010, GC = GC)
TC = as.vector(ma(NPmentions$TC[-length(NPmentions$TC)]))
TC.df = data.frame(Year = 1980:2010, TC = TC)
Plot the number of Google pages mentioning storm chasers.
ggplot(GC.df, aes(x = Year, y = GC)) + geom_point() + geom_smooth(span = 0.5) +
scale_y_reverse() + ylab("Ten-year Avg Google Pages\n axis reversed") +
xlab("Ten-year interval") + theme_bw() + scale_x_continuous(breaks = seq(1990,
2010, 5), labels = c("1981-1990", "1986-1995", "1991-2000", "1996-2005",
"2001-2010"))
Merged = merge(Ratio.df, GC.df, "Year")
Merged3 = subset(Merged, Year >= 1996)
summary(lm(rhoCountry ~ Year + GC, data = Merged3))
##
## Call:
## lm(formula = rhoCountry ~ Year + GC, data = Merged3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.00895 -0.00403 -0.00112 0.00197 0.01483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.516210 2.263651 -0.23 0.82
## Year 0.000320 0.001133 0.28 0.78
## GC 0.006435 0.000925 6.96 1.5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00737 on 12 degrees of freedom
## Multiple R-squared: 0.966, Adjusted R-squared: 0.961
## F-statistic: 172 on 2 and 12 DF, p-value: 1.45e-09
summary(lm(rhoCity ~ Year + GC, data = Merged3))
##
## Call:
## lm(formula = rhoCity ~ Year + GC, data = Merged3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.02496 -0.01283 -0.00379 0.01243 0.03764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.67076 5.73059 3.08 0.0095 **
## Year -0.00871 0.00287 -3.04 0.0103 *
## GC 0.00764 0.00234 3.26 0.0068 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0187 on 12 degrees of freedom
## Multiple R-squared: 0.47, Adjusted R-squared: 0.382
## F-statistic: 5.32 on 2 and 12 DF, p-value: 0.0221
Long3 = melt(Merged3, id = c("GC"), measure = c("rhoCity", "rhoCountry"))
levels(Long3$variable)[levels(Long3$variable) == "rhoCity"] = "City"
levels(Long3$variable)[levels(Long3$variable) == "rhoCountry"] = "Country"
Long4 = melt(Merged3, id = c("GC"), measure = c("rhoCitylo", "rhoCountrylo"))
Long5 = melt(Merged3, id = c("GC"), measure = c("rhoCityhi", "rhoCountryhi"))
Final = data.frame(Long3, lo = Long4[, 3], hi = Long5[, 3])
pp = ggplot(Final, aes(x = GC, y = value)) + geom_point() + geom_smooth(color = "black") +
facet_wrap(~variable) + geom_linerange(aes(ymax = hi, ymin = lo)) + theme_bw() +
xlab(expression(paste("Google News", {
}^TM, " Mentions of 'Storm Chaser'"))) + ylab(expression(paste("Tornado Reports/100 km",
{
}^2)))
Create a two panel plot.
pp
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using
## loess. Use 'method = x' to change the smoothing method.
FIGURE 14
pp = ggplot(Merged3, aes(x = GC, y = rhoCountry)) + geom_point() + geom_smooth()
pp = pp + geom_linerange(aes(ymax = rhoCountryhi, ymin = rhoCountrylo))
pp = pp + xlab(expression(paste("Google News", {
}^TM, " Mentions of 'Storm Chaser'")))
p1 = pp + ylab(expression(paste("County Reports/100 km", {
}^2))) + theme_bw()
pp = ggplot(Merged3, aes(x = GC, y = rhoCity)) + geom_point() + geom_smooth()
pp = pp + geom_linerange(aes(ymax = rhoCityhi, ymin = rhoCitylo))
pp = pp + xlab(expression(paste("Google News", {
}^TM, " Mentions of 'Storm Chaser'")))
p2 = pp + ylab(expression(paste("City Reports/100 km", {
}^2))) + theme_bw()
RussellKS = geocode("Russell KS")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(RussellKS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo &
POP_1990 > allpop)
dim(Mapcities)[1]
## [1] 315
Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))
unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
nt = c(nt, Torn4.ppp$n)
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("Central Plains", length(beginYear:endYear))
R1.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi,
ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi,
rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo,
Region = Region)
# ggplot(R1.df, aes(Year, ratio)) + geom_point() +
# geom_smooth(method='lm') + theme_bw()
Annual percentage (factor) by which city density exceeds country density in the South and Midwest.
ArmoryMS = geocode("Armory MS")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(ArmoryMS), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo &
POP_1990 > allpop)
dim(Mapcities)[1]
## [1] 964
Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))
## Warning: 4 points were rejected as lying outside the specified window
## Warning: data contain duplicated points
unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
nt = c(nt, Torn4.ppp$n)
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("South", length(beginYear:endYear))
R2.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi,
ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi,
rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo,
Region = Region)
# ggplot(R2.df[R2.df$nt>=10,], aes(Year, ratio)) + geom_point() +
# geom_smooth(method='lm') + theme_bw()
TerreHauteIN = geocode("Terre Haute IN")
allpop = 1000
m2km = 0.001
km2 = 1e+08
dist = numeric()
rho = numeric()
hi = numeric()
lo = numeric()
Year = numeric()
ratio = numeric()
ratiohi = numeric()
ratiolo = numeric()
rhoCity = numeric()
rhoCountry = numeric()
rhoCountryhi = numeric()
rhoCountrylo = numeric()
rhoCityhi = numeric()
rhoCitylo = numeric()
nt = numeric()
beginYear = 1990
endYear = 2011
DT = 1
Map = get_map(location = unlist(TerreHauteIN), maptype = "roadmap", zoom = 7)
lla = attr(Map, "bb")$ll.lat
llo = attr(Map, "bb")$ll.lon
ula = attr(Map, "bb")$ur.lat
ulo = attr(Map, "bb")$ur.lon
Torn2 = subset(Torn, SLAT <= ula & SLAT >= lla & SLON <= ulo & SLON >= llo)
Torn3 = subset(Torn2, MONTH == 5 | MONTH == 6)
Torn3.sdf = Torn3
TornA.sdf = Torn3.sdf[Torn3.sdf$FSCALE >= 0, ]
TornA.ppp = as(TornA.sdf["YEAR"], "ppp")
Mapcities = subset(UScities, LAT <= ula & LAT >= lla & LON <= ulo & LON >= llo &
POP_1990 > allpop)
dim(Mapcities)[1]
## [1] 2053
Mapcities.sdf = Mapcities
coordinates(Mapcities.sdf) = ~LON + LAT
proj4string(Mapcities.sdf) = CRS(ll)
Mapcities.sdf = spTransform(Mapcities.sdf, CRS(proj4string(Torn3)))
Mapcities.ppp = as.ppp(coordinates(Mapcities.sdf), W = as.owin(TornA.ppp))
## Warning: 48 points were rejected as lying outside the specified window
unitname(Mapcities.ppp) = c("meter", "meters")
Z = distmap(Mapcities.ppp)
for (yr in seq(beginYear, endYear, 1)) {
Torn4.ppp = TornA.ppp[TornA.ppp$marks > (yr - DT) & TornA.ppp$marks <= yr]
nt = c(nt, Torn4.ppp$n)
rhat = rhohat(unmark(Torn4.ppp), Z, bw = 0.25, method = "transform")
dist = c(dist, rhat$Z * m2km)
rho = c(rho, rhat$rho * km2)
rhoCity = c(rhoCity, rhat$rho[1] * km2)
rhoCityhi = c(rhoCityhi, rhat$hi[1] * km2)
rhoCitylo = c(rhoCitylo, rhat$lo[1] * km2)
rhoCountry = c(rhoCountry, rhat$rho[length(rhat$rho)] * km2)
rhoCountryhi = c(rhoCountryhi, rhat$hi[length(rhat$rho)] * km2)
rhoCountrylo = c(rhoCountrylo, rhat$lo[length(rhat$rho)] * km2)
ratio = c(ratio, rhat$rho[1]/rhat$rho[length(rhat$rho)])
hi = c(hi, rhat$hi * km2)
lo = c(lo, rhat$lo * km2)
ratiohi = c(ratiohi, rhat$hi[1]/rhat$hi[length(rhat$hi)])
ratiolo = c(ratiolo, rhat$lo[1]/rhat$lo[length(rhat$lo)])
Year = c(Year, rep(yr, length(rhat$Z)))
}
Region = rep("Midwest", length(beginYear:endYear))
R3.df = data.frame(Year = beginYear:endYear, nt = nt, ratio = ratio, ratioU = ratiohi,
ratioL = ratiolo, rhoCity = rhoCity, rhoCountry = rhoCountry, rhoCountryhi = rhoCountryhi,
rhoCountrylo = rhoCountrylo, rhoCityhi = rhoCityhi, rhoCitylo = rhoCitylo,
Region = Region)
ggplot(R3.df[R3.df$nt >= 20, ], aes(Year, ratio)) + geom_point() + geom_smooth(method = "lm") +
theme_bw()
Combine files
Final2 = rbind(R1.df, R2.df, R3.df)
ggplot(Final2[Final2$nt >= 20, ], aes(Year, ratio)) + geom_point() + geom_smooth(method = "lm") +
theme_bw() + facet_grid(. ~ Region) + ylab("Factor By Which Tornado Report Density In Cities\n Exceeds That in Rural Areas")