County-level analysis by individual states. The framework uses spatial polygons for organizing the data.
Decide on a spatial domain.
FP1 = 55; AB1 = "WI"
FP2 = 19; AB2 = "IA"
FP3 = 17; AB3 = "IL"
FP4 = 27; AB4 = "MN"
FP5 = 38; AB5 = "ND"
FP6 = 46; AB6 = "SD"
FP7 = 31; AB7 = "NE"
FP8 = 20; AB8 = "KS"
FP9 = 29; AB9 = "MO"
FP10 = 40; AB10 = "OK"
FP11 = 18; AB11 = "IN"
FP12 = 26; AB12 = "MI"
FP13 = 5; AB13 = "AR"
FP14 = 8; AB14 = "CO"
FP15 = 35; AB15 = "NM"
FP16 = 48; AB16 = "TX"
FP17 = 22; AB17 = "LA"
FP18 = 28; AB18 = "MS"
FP19 = 1; AB19 = "AL"
FP20 = 47; AB20 = "TN"
FP21 = 21; AB21 = "KY"
FP22 = 39; AB22 = "OH"
FP23 = 13; AB23 = "GA"
FP24 = 56; AB24 = "WY"
FP = c(FP1, FP2, FP3, FP4, FP5, FP6, FP7, FP8, FP9, FP10, FP11,
FP12, FP13, FP14, FP15, FP16, FP17, FP18, FP19, FP20, FP21, FP22, FP23, FP24)
AB = c(AB1, AB2, AB3, AB4, AB5, AB6, AB7, AB8, AB9, AB10, AB11,
AB12, AB13, AB14, AB15, AB16, AB17, AB18, AB19, AB20, AB21, AB22, AB23, AB24)
The model is built using data from the SPC tornado dataset. Data are stored as a spatial lines data frame. Transform the native coordinates to a Mercator projection.
#setwd("~/Dropbox/Tyler")
if(!file.exists("torn")){
download.file(url = "http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
destfile = "tornado.zip")
unzip("tornado.zip")
}
library("rgdal")
## Loading required package: sp
## rgdal: version: 1.0-4, (SVN revision 548)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
## Linking to sp version: 1.1-1
TornL = readOGR(dsn = "torn", layer = "torn",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "torn", layer: "torn"
## with 58959 features
## It has 21 fields
TornL = spTransform(TornL, CRS("+init=epsg:3857")) #Mercator projection
Boundaries are from https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. The 20m = 1:20,000,000 low resolution boundaries. Here we use the 5m = 1:5,000,000.
if(!file.exists("cb_2014_us_county_5m.shp")){
download.file("http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_county_5m.zip",
"cb_2014_us_county_5m.zip", mode = "wb")
unzip("cb_2014_us_county_5m.zip")
}
US.sp = readOGR(dsn = ".", layer = "cb_2014_us_county_5m",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "cb_2014_us_county_5m"
## with 3233 features
## It has 9 fields
## Warning in readOGR(dsn = ".", layer = "cb_2014_us_county_5m",
## stringsAsFactors = FALSE): Z-dimension discarded
The warning message about the Z-dimension can be safely ignored.
Extract the state using the state Federal Information Processing Standard (FIPS). Print out the number of counties and the area of state in square km. Check with Wolfram alpha.
library("rgeos")
## rgeos version: 0.3-11, (SVN revision 479)
## GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921
## Linking to sp version: 1.1-0
## Polygon checking: TRUE
ST.sp = US.sp[as.integer(US.sp$STATEFP) %in% FP, ]
nrow(ST.sp) #number of counties
## [1] 2168
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
gArea(counties.sp) / 10^6 #total area of the spatial domain in square km
## [1] 7593453
Use the population estimates from the Census Bureau from 1970-2012 see http://www.nber.org/data/census-intercensal-county-population.html. Use the 2012 value for 2013 & 2014. Census U.S. Intercensal County Population Data, 1970-2014 from http://www.nber.org/data/census-intercensal-county-population.html.
library("reshape2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rgeos':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
L = "http://myweb.fsu.edu/jelsner/data/county_population.csv"
pop = read.csv(L) %>%
filter(state_fips %in% FP, county_fips != 0) %>%
mutate(pop2013 = pop2012,
pop2014 = pop2012,
county = as.character(fips)) %>%
select(county, pop1970:pop2012, pop2013, pop2014)
Pop.df = melt(pop, id.vars = "county") %>%
mutate(yr = as.numeric(substring(variable, first = 4, last = 7))) %>%
rename(pop = value) %>%
mutate(lpop = log10(pop)) %>%
select(county, pop, yr, lpop)
Pop.df$ID = match(Pop.df$county, as.integer(county)) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ] #order by spatial ID
The county population data are available through 2014 but the raw file requires some manual manipulation.
PC.df = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[yr == max(yr)] - pop[yr == min(yr)])/pop[yr == max(yr)] * 100) %>%
data.frame()
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
spdf$area = rgeos::gArea(counties.sp, byid = TRUE)
Latest population values by county are needed for correlation and choropleth. The log of the population density is the common log.
poplatest = subset(Pop.df, yr == max(yr))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Add a buffer to the tracks based on the width specified in the attribute table to create a spatial polygons data frame of tornado paths. Overlay paths onto the extent and return a vector indicating either NA (is not inside extent) or the county number. Remove duplicate paths. Start with 1970 for a 45-year climatology. capStyle is set to “FLAT” so the track is not made longer.
syr = 1970
eyr = 2014
Width = TornL$wid * .9144
sum(Width[Width == 0])
## [1] 0
TornP = gBuffer(TornL, byid = TRUE, width = Width/2, capStyle = "FLAT")
tc = over(TornP, counties.sp)
TornP2 = subset(TornP, !is.na(tc))
TornP2 = subset(TornP2, yr >= syr, yr <= eyr)
df = data.frame(SLAT = TornP2$slat,
SLON = TornP2$slon,
ELAT = TornP2$elat,
ELON = TornP2$elon,
DATE = TornP2$date)
dup = duplicated(df)
sum(dup)
## [1] 405
sum(dup)/dim(TornP2@data)[1] * 100
## [1] 1.055897
TornP3 = subset(TornP2, !dup)
dim(TornP3@data)
## [1] 37951 21
All EF0+ tornadoes since 1970. Overlay tornado paths on counties. Each element of the list is a county subset of the tornado track file. The data frame nTor.df contains the per-county tornado count, area, and rate.
df = as.data.frame(TornP3) %>%
group_by(yr) %>%
summarize(nT = n())
library("ggplot2")
library("ggrepel")
ggplot(df, aes(x = yr, y = nT)) +
geom_point() +
geom_smooth() +
# geom_text(aes(label = nT), vjust = -.5, size = 4, color = "darkblue") +
geom_text_repel(aes(label = nT), color = "darkblue") +
xlab("Year") + ylab("Annual Number of EF0+ Tornadoes") +
scale_y_continuous(limits = c(0, 1600)) +
# scale_y_continuous(limits = c(0, 500)) +
theme(axis.text.x = element_text(size = 7),
legend.position = "none")
Figure: Time series tornado counts over the domain
The PathsAll.df data frame contains the tornado path attributes listed by county. Information is repeated for each county in cases where the path intersects more than one county. Padding is needed for counties without tornadoes.
TornP3 = subset(TornP3, mag >= 0) #EF0+
ct = over(counties.sp, TornP3, returnList = TRUE)
area = gArea(counties.sp, byid = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$date)))
Nyears = diff(range(TornP3$yr)) + 1
nTor.df = data.frame(county = county,
Nyears = Nyears,
nT = nT,
area = area/1000000,
nTd = nTd,
rate = nT/area * 10^6 * 100 * 100 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county)
)
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$nTd = nTor.df$nTd
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
PathsAll.df = plyr::ldply(ct, data.frame)
colnames(PathsAll.df)[1] = "county"
none = nTor.df$county[nTor.df$nT == 0] # for counties without tornadoes
if(length(none) > 0){
tmp = PathsAll.df[1:length(none), ]
tmp[!is.na(tmp)] = NA
tmp$county = none
PathsAll.df = rbind(PathsAll.df, tmp)
}
Area is now given in square km. Rate is annual number of tornadoes per sq 100 km. Choropleth map of tornado counts & tornado days.
library("maps")
##
## # ATTENTION: maps v3.0 has an updated 'world' map. #
## # Many country borders and names have changed since 1990. #
## # Type '?world' or 'news(package="maps")'. See README_v3. #
library("maptools")
## Checking rgeos availability: TRUE
##
## Attaching package: 'maptools'
## The following object is masked from 'package:sp':
##
## nowrapSpatialLines
states = map('state', fill = TRUE,
region = c("wisconsin", "iowa", "illinois", "minnesota",
"north dakota", "south dakota", "nebraska",
"kansas", "missouri", "oklahoma", "arkansas",
"michigan", "colorado", "new mexico", "texas",
"indiana", "louisiana", "mississippi", "alabama",
"tennessee", "kentucky", "georgia", "ohio", "wyoming"),
plot = FALSE)
IDs = sapply(strsplit(states$names, ":"), function(x) x[1])
ll = "+proj=longlat +datum=WGS84"
states.sl = map2SpatialLines(states, IDs = IDs, proj4string = CRS(ll))
states.sl = spTransform(states.sl, CRS(proj4string(TornL)))
library("wesanderson")
range(spdf$nT)
## [1] 0 217
rng = seq(0, 240, 40)
crq = wes_palette(6, name = "Zissou", type = "continuous")
spplot(spdf, "nT", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF0+ Tornadoes (1970-2014)")
library("RColorBrewer")
range(spdf$nTd)
## [1] 0 135
rng = seq(0, 140, 20)
crq = brewer.pal(7, "GnBu")
spplot(spdf, "nTd", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF0+ Tornado Days (1970-2014)")
range(log2(spdf$pop))
## [1] 6.149747 22.318752
rng = seq(6, 24, 3)
cr = brewer.pal(6, "Blues")
labs = as.character(round(2^rng))
spdf$pop2 = log2(spdf$pop)
spplot(spdf, "pop2", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = cr,
colorkey = list(space = "bottom", labels = labs),
par.settings = list(axis.line = list(col = NA)),
sub = "Population (2012)")
The data frame nTor.year is the number of tornadoes affecting the region by year. Need to filter by != NA to remove the row containing the count of counties without tornadoes as the year is set to NA.
nTor.year = PathsAll.df %>%
group_by(yr) %>%
summarize(nT = n()) %>%
filter(yr != "NA") %>%
data.frame()
nTor.year$yr2 = nTor.year$yr
sum(nTor.year$nT)
## [1] 43579
The data frame nTor.day0 contains the tornado county and tornado day county by year and county.
nTor.day0 = PathsAll.df %>%
group_by(county, yr) %>%
summarize(nT = length(county),
nTd = length(unique(date)))
nTor.day0$county = as.factor(nTor.day0$county)
nTor.day0$yr = as.factor(nTor.day0$yr)
nTor.day0 = left_join(expand.grid(county = levels(as.factor(PathsAll.df$county)),
yr = levels(as.factor(PathsAll.df$yr))),
nTor.day0)
## Joining by: c("county", "yr")
nTor.day0[is.na(nTor.day0)] = 0
nTor.day0$county = as.character(nTor.day0$county)
nTor.day0$yr = as.integer(as.character(nTor.day0$yr))
nTor.day = base::merge(nTor.df[c("county", "area", "ID")],
nTor.day0) %>%
arrange(ID, yr)
Merge tornado and population data.
nTor.day$county = as.integer(nTor.day$county)
popNtor = base::merge(nTor.day, Pop.df, all = FALSE) %>%
mutate(density = pop/area,
Ldensity = log2(density),
yr2 = yr) %>% #needed for the INLA models
arrange(ID, yr)