County-level analysis by individual states. The framework uses spatial polygons for organizing the data.
Decide on a spatial domain.
FP1 = 55; AB1 = "WI"
FP2 = 19; AB2 = "IA"
FP3 = 17; AB3 = "IL"
FP4 = 27; AB4 = "MN"
FP5 = 38; AB5 = "ND"
FP6 = 46; AB6 = "SD"
FP7 = 31; AB7 = "NE"
FP8 = 20; AB8 = "KS"
FP9 = 29; AB9 = "MO"
FP10 = 40; AB10 = "OK"
FP11 = 18; AB11 = "IN"
FP12 = 26; AB12 = "MI"
FP13 = 5; AB13 = "AR"
FP14 = 8; AB14 = "CO"
FP15 = 35; AB15 = "NM"
FP16 = 48; AB16 = "TX"
FP17 = 22; AB17 = "LA"
FP18 = 28; AB18 = "MS"
FP19 = 1; AB19 = "AL"
FP20 = 47; AB20 = "TN"
FP21 = 21; AB21 = "KY"
FP22 = 39; AB22 = "OH"
FP23 = 13; AB23 = "GA"
FP24 = 56; AB24 = "WY"
FP = c(FP1, FP2, FP3, FP4, FP5, FP6, FP7, FP8, FP9, FP10, FP11,
FP12, FP13, FP14, FP15, FP16, FP17, FP18, FP19, FP20, FP21, FP22, FP23, FP24)
AB = c(AB1, AB2, AB3, AB4, AB5, AB6, AB7, AB8, AB9, AB10, AB11,
AB12, AB13, AB14, AB15, AB16, AB17, AB18, AB19, AB20, AB21, AB22, AB23, AB24)
The model is built using data from the SPC tornado dataset. Data are stored as a spatial lines data frame. Transform the native coordinates to a Mercator projection.
if(!file.exists("torn")){
download.file(url = "http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
destfile = "tornado.zip")
unzip("tornado.zip")
}
library("rgdal")
## Loading required package: sp
## rgdal: version: 1.0-4, (SVN revision 548)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
## Linking to sp version: 1.1-1
TornL = readOGR(dsn = "torn", layer = "torn",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "torn", layer: "torn"
## with 60114 features
## It has 22 fields
TornL = spTransform(TornL, CRS("+init=epsg:3857")) #Mercator projection
Boundaries are from https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html. The 20m = 1:20,000,000 low resolution boundaries. Here we use the 5m = 1:5,000,000.
if(!file.exists("cb_2014_us_county_5m.shp")){
download.file("http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_county_5m.zip",
"cb_2014_us_county_5m.zip", mode = "wb")
unzip("cb_2014_us_county_5m.zip")
}
US.sp = readOGR(dsn = ".", layer = "cb_2014_us_county_5m",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "cb_2014_us_county_5m"
## with 3233 features
## It has 9 fields
## Warning in readOGR(dsn = ".", layer = "cb_2014_us_county_5m",
## stringsAsFactors = FALSE): Z-dimension discarded
The warning message about the Z-dimension can be safely ignored.
Extract the state using the state Federal Information Processing Standard (FIPS). Print out the number of counties and the area of state in square km. Check with Wolfram alpha.
library("rgeos")
## rgeos version: 0.3-11, (SVN revision 479)
## GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921
## Linking to sp version: 1.1-0
## Polygon checking: TRUE
ST.sp = US.sp[as.integer(US.sp$STATEFP) %in% FP, ]
nrow(ST.sp) #number of counties
## [1] 2168
county = paste(ST.sp$STATEFP, ST.sp$COUNTYFP, sep = "")
countiesun.sp = geometry(spChFIDs(ST.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
gArea(counties.sp) / 10^6 #total area of the spatial domain in square km
## [1] 7593453
Use the population estimates from the Census Bureau from 1970-2012 see http://www.nber.org/data/census-intercensal-county-population.html. Use the 2012 value for 2013 & 2014. Census U.S. Intercensal County Population Data, 1970-2014 from http://www.nber.org/data/census-intercensal-county-population.html.
library("reshape2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rgeos':
##
## intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
L = "http://myweb.fsu.edu/jelsner/data/county_population.csv"
pop = read.csv(L) %>%
filter(state_fips %in% FP, county_fips != 0) %>%
mutate(pop2013 = pop2012,
pop2014 = pop2012,
pop2015 = pop2012,
county = as.character(fips)) %>%
select(county, pop1970:pop2012, pop2013, pop2014, pop2015)
Pop.df = melt(pop, id.vars = "county") %>%
mutate(yr = as.numeric(substring(variable, first = 4, last = 7))) %>%
rename(pop = value) %>%
mutate(lpop = log10(pop)) %>%
select(county, pop, yr, lpop)
Pop.df$ID = match(Pop.df$county, as.integer(county)) #generate spatial ID
Pop.df = Pop.df[order(Pop.df$ID), ] #order by spatial ID
The county population data are available through 2014 but the raw file requires some manual manipulation.
PC.df = Pop.df %>%
group_by(ID) %>%
summarize(Change = (pop[yr == max(yr)] - pop[yr == min(yr)])/pop[yr == max(yr)] * 100) %>%
data.frame()
row.names(PC.df) = county
spdf = SpatialPolygonsDataFrame(counties.sp, PC.df)
spdf$Name = ST.sp$NAME
spdf$area = rgeos::gArea(counties.sp, byid = TRUE)
Latest population values by county are needed for correlation and choropleth. The log of the population density is the common log.
poplatest = subset(Pop.df, yr == max(yr))
spdf$pop = poplatest$pop
spdf$Lpop = log10(spdf$pop)
spdf$density = spdf$pop/spdf$area * 10^6
spdf$Ldensity = log10(spdf$density)
Add a buffer to the tracks based on the width specified in the attribute table to create a spatial polygons data frame of tornado paths. Overlay paths onto the extent and return a vector indicating either NA (is not inside extent) or the county number. Remove duplicate paths. Start with 1970 for a 45-year climatology. capStyle is set to “FLAT” so the track is not made longer.
syr = 1970
eyr = 2015
Width = TornL$wid * .9144
sum(Width[Width == 0])
## [1] 0
TornP = gBuffer(TornL, byid = TRUE, width = Width/2, capStyle = "FLAT")
tc = over(TornP, counties.sp)
TornP2 = subset(TornP, !is.na(tc))
TornP2 = subset(TornP2, yr >= syr, yr <= eyr)
df = data.frame(SLAT = TornP2$slat,
SLON = TornP2$slon,
ELAT = TornP2$elat,
ELON = TornP2$elon,
DATE = TornP2$date)
dup = duplicated(df)
sum(dup)
## [1] 405
sum(dup)/dim(TornP2@data)[1] * 100
## [1] 1.027397
TornP3 = subset(TornP2, !dup)
dim(TornP3@data)
## [1] 39015 22
All EF0+ tornadoes since 1970. Overlay tornado paths on counties. Each element of the list is a county subset of the tornado track file. The data frame nTor.df contains the per-county tornado count, area, and rate.
df = as.data.frame(TornP3) %>%
group_by(yr) %>%
summarize(nT = n())
library("ggplot2")
library("ggrepel")
ggplot(df, aes(x = yr, y = nT)) +
geom_point() +
geom_smooth() +
# geom_text(aes(label = nT), vjust = -.5, size = 4, color = "darkblue") +
geom_text_repel(aes(label = nT), color = "darkblue") +
xlab("Year") + ylab("Annual Number of EF0+ Tornadoes") +
scale_y_continuous(limits = c(0, 1600)) +
# scale_y_continuous(limits = c(0, 500)) +
theme(axis.text.x = element_text(size = 7),
legend.position = "none")
Figure: Time series tornado counts over the domain
The PathsAll.df data frame contains the tornado path attributes listed by county. Information is repeated for each county in cases where the path intersects more than one county. Padding is needed for counties without tornadoes.
TornP3 = subset(TornP3, mag >= 0) #EF0+
ct = sp::over(counties.sp, TornP3, returnList = TRUE)
area = gArea(counties.sp, byid = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$date)))
Nyears = diff(range(TornP3$yr)) + 1
nTor.df = data.frame(county = county,
Nyears = Nyears,
nT = nT,
area = area/1000000,
nTd = nTd,
rate = nT/area * 10^6 * 100 * 100 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county)
)
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$nTd = nTor.df$nTd
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
PathsAll.df = plyr::ldply(ct, data.frame)
colnames(PathsAll.df)[1] = "county"
none = nTor.df$county[nTor.df$nT == 0] # for counties without tornadoes
if(length(none) > 0){
tmp = PathsAll.df[1:length(none), ]
tmp[!is.na(tmp)] = NA
tmp$county = none
PathsAll.df = rbind(PathsAll.df, tmp)
}
length(none)/dim(pop)[1] * 100
## [1] 0.8763838
Area is now given in square km. Rate is annual number of tornadoes per sq 100 km. Choropleth map of tornado counts & tornado days. Less than 1% of all counties have not experienced a tornado.
library("maps")
##
## # ATTENTION: maps v3.0 has an updated 'world' map. #
## # Many country borders and names have changed since 1990. #
## # Type '?world' or 'news(package="maps")'. See README_v3. #
library("maptools")
## Checking rgeos availability: TRUE
states = map('state', fill = TRUE,
region = c("wisconsin", "iowa", "illinois", "minnesota",
"north dakota", "south dakota", "nebraska",
"kansas", "missouri", "oklahoma", "arkansas",
"michigan", "colorado", "new mexico", "texas",
"indiana", "louisiana", "mississippi", "alabama",
"tennessee", "kentucky", "georgia", "ohio", "wyoming"),
plot = FALSE)
IDs = sapply(strsplit(states$names, ":"), function(x) x[1])
ll = "+proj=longlat +datum=WGS84"
states.sl = map2SpatialLines(states, IDs = IDs, proj4string = CRS(ll))
states.sl = spTransform(states.sl, CRS(proj4string(TornL)))
library("wesanderson")
range(spdf$nT)
## [1] 0 219
rng = seq(0, 240, 40)
crq = wes_palette(6, name = "Zissou", type = "continuous")
spplot(spdf, "nT", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF0+ Tornadoes (1970-2015)")
df = as.data.frame(spdf) %>%
arrange(desc(rate))
df[1:10, ]
## ID Change Name area pop Lpop density Ldensity
## 1 1324 43.48717 Galveston 1484.2562 300484 5.477821 202.44754 2.306312
## 2 712 50.82998 Lafayette 940.5175 227055 5.356131 241.41497 2.382764
## 3 1996 NA Broomfield 146.9212 58298 4.765654 396.79784 2.598569
## 4 861 59.04946 Harris 6131.8439 4253700 6.628767 693.70651 2.841176
## 5 1751 15.83376 Acadia 2288.5315 61912 4.791775 27.05316 1.432218
## 6 1654 18.85442 Denver 683.4865 634265 5.802271 927.98468 2.967541
## 7 24 70.17160 Johnson 2678.8557 153441 5.185941 57.27856 1.757992
## 8 1998 59.57576 Adams 5213.0624 459598 5.662378 88.16277 1.945285
## 9 385 28.85811 Oklahoma 2819.1453 741781 5.870276 263.12266 2.420158
## 10 933 69.19153 Cleveland 2165.7340 265638 5.424290 122.65495 2.088685
## county Nyears nT nTd rate
## 1 48167 46 74 66 10.838396
## 2 22055 46 33 28 7.627623
## 3 08014 46 5 4 7.398230
## 4 48201 46 192 124 6.806946
## 5 22001 46 71 49 6.744405
## 6 08031 46 21 20 6.679309
## 7 48251 46 75 42 6.086310
## 8 08001 46 141 102 5.879879
## 9 40109 46 75 47 5.783437
## 10 40027 46 57 42 5.721526
library("RColorBrewer")
range(spdf$nTd)
## [1] 0 137
rng = seq(0, 140, 20)
crq = brewer.pal(7, "GnBu")
spplot(spdf, "nTd", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = paste(rng)),
par.settings = list(axis.line = list(col = NA)),
sub = "Number of EF0+ Tornado Days (1970-2015)")
range(log2(spdf$pop))
## [1] 6.149747 22.318752
rng = seq(6, 24, 3)
cr = brewer.pal(6, "Blues")
labs = as.character(round(2^rng))
spdf$pop2 = log2(spdf$pop)
spplot(spdf, "pop2", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = cr,
colorkey = list(space = "bottom", labels = labs),
par.settings = list(axis.line = list(col = NA)),
sub = "Population (2012)")
The data frame nTor.year is the number of tornadoes affecting the region by year. Need to filter by != NA to remove the row containing the count of counties without tornadoes as the year is set to NA.
nTor.year = PathsAll.df %>%
group_by(yr) %>%
summarize(nT = n()) %>%
filter(yr != "NA") %>%
data.frame()
nTor.year$yr2 = nTor.year$yr
sum(nTor.year$nT)
## [1] 44789
The data frame nTor.day0 contains the tornado county and tornado day county by year and county.
nTor.day0 = PathsAll.df %>%
group_by(county, yr) %>%
summarize(nT = length(county),
nTd = length(unique(date)))
nTor.day0$county = as.factor(nTor.day0$county)
nTor.day0$yr = as.factor(nTor.day0$yr)
nTor.day0 = left_join(expand.grid(county = levels(as.factor(PathsAll.df$county)),
yr = levels(as.factor(PathsAll.df$yr))),
nTor.day0)
## Joining by: c("county", "yr")
nTor.day0[is.na(nTor.day0)] = 0
nTor.day0$county = as.character(nTor.day0$county)
nTor.day0$yr = as.integer(as.character(nTor.day0$yr))
nTor.day = base::merge(nTor.df[c("county", "area", "ID")],
nTor.day0) %>%
arrange(ID, yr)
Merge tornado and population data.
nTor.day$county = as.integer(nTor.day$county)
popNtor = base::merge(nTor.day, Pop.df, all = FALSE) %>%
mutate(density = pop/area,
Ldensity = log2(density),
yr2 = yr) %>% #needed for the INLA models
arrange(ID, yr)
Some global controls for INLA. Use as needed.
#source("http://www.math.ntnu.no/inla/givemeINLA.R")
library("INLA")
## Loading required package: Matrix
## Loading required package: splines
control = list(
predictor = list(compute = TRUE, link = 1),
inla = list(strategy = "laplace",
fast = FALSE,
stencil = 7,
npoints = 198,
int.strategy = "grid",
dz = .5),
results = list(return.marginals.random = TRUE),
compute = list(config = TRUE, mlik = TRUE, cpo = TRUE, dic = TRUE, po = TRUE),
family = list(variant = 1, hyper = list(theta = list(prior = "loggamma", param = c(1, 1)))))
Spatial neighborhood definition as an inla graph
library("spdep")
nb = poly2nb(spdf)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
The area multiplied by Nyears indicates the square-meter years exposed to tornadoes (E). Scale the exposure to have a mean close to unity. The model has a spatial id. E is scaled to have a mean of one. constr = constant rate. The DIC measures how well the model fits the data; the larger this is, the worse the fit. Year is centered on 1991.
To use the model for prediction, pad the data set by one year and put NAs for the number of tornadoes that year. Use latest population for estimate population. This gives the expected observed frequencies for any year, not the expected total frequencies, which is had only by estimating a larger population.
popNtor$ID2 = popNtor$ID
tmp = subset(popNtor, yr == 1991)
tmp$Ldensity = max(popNtor$Ldensity)
tmp$yr = 2016
tmp$yr2 = tmp$yr
tmp$nT = NA
popNtor2 = rbind(tmp, popNtor)
Model with spatial term. Determine the model that results in the lowest DIC. 40 minutes to run on Mac pro.
mean(popNtor2$area)
## [1] 3502.515
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity +
Ldensity:I(yr - 1991) + I(yr - 1991)
model = inla(formula = formula, family = "nbinomial", E = area/3500,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
df = model$summary.fitted.values[1:nrow(tmp), ] * 100 * 100 / 3500 #per sq 100 km
#df = model$summary.fitted.values[1:nrow(tmp), ] * 100 * 100 / 2000 / .386102 #per sq 100 miles
row.names(df) = county
The predicted values give the annual expected rate for each county. The rate is the number of tornadoes per 100km by 100km square region per year.
Model diagnostics. The predictive quality of the model is assessed by the cross-validated log score. A smaller value of the score indicates better prediction quality (Gneiting and Raftery 2007).
cpopit = data.frame(model$cpo,model$summary.fitted.values)[!is.na(model$cpo$pit), ]
modPIT = cpopit$pit - runif(nrow(cpopit)) * cpopit$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 2.8838, p-value = 0.03136
-mean(log(cpopit$cpo)) #equivalent to a predictive mean square error (lower is better).
## [1] 0.8314674
cor.test(popNtor$nT, cpopit$mean, conf.level = .95)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and cpopit$mean
## t = 78.994, df = 99726, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2368168 0.2484987
## sample estimates:
## cor
## 0.2426665
cor(popNtor$nT, cpopit$mean, method = "s")
## [1] 0.2223245
#plot(popNtor$nT + runif(n = length(popNtor$nT)), cpofit$mean)
Brier score
brier.score <- function(x, m){
with(m, {mean(x^2) - 2 * mean(x * mean) + mean(mean^2 + sd^2)})
}
brier.score(popNtor[["nT"]], cpopit) # (lower is better)
## [1] 0.9884705
library("ggplot2")
ggplot() +
geom_histogram(aes(x = modPIT), color = "white",
fill = "grey", binwidth = .05) +
xlab("Probability") +
ylab("Frequency")
# ggtitle("Distribution of modified PIT") #looks good! Model fits the data well over/under fit & bias not a problem
spdfR = spCbind(spdf, df)
cor(spdfR$rate, spdfR$mean)
## [1] 0.7940903
ggplot(as.data.frame(spdfR), aes(x = rate, y = mean)) +
geom_point() +
geom_abline(slope = 1) +
scale_x_continuous(limits = c(0, 11), breaks = seq(0, 11, 2)) +
scale_y_continuous(limits = c(0, 11), breaks = seq(0, 11, 2)) +
geom_smooth(method = lm, se = FALSE, color = "red") +
xlab("Raw Rate\n [Tornadoes/100 km square region]") +
ylab("Expected Rate\n [Tornadoes/100 km square region]")
range(log2(spdfR$mean))
## [1] -4.500615 2.108635
rng = seq(-5, 3, 1)
crq = brewer.pal(8, "Oranges")
as.character(round(2^rng, 2))
## [1] "0.03" "0.06" "0.12" "0.25" "0.5" "1" "2" "4" "8"
labs = c("0.03", "0.06", "0.12", "0.25", "0.5", "1", "2", "4", "8")
centroids = SpatialPointsDataFrame(gCentroid(spdfR, byid = TRUE), data.frame(spdfR))
cxy = coordinates(centroids)
spdfR$mean2 = log2(spdfR$mean)
spplot(spdfR, "mean2", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
par.settings = list(axis.line = list(col = NA)),
sub = expression("Expected Annual Tornado (EF1+) Occurrence Rate [per 100 km square region]"))
range(spdfR$sd)
## [1] 0.01457663 0.92964066
rngs = seq(0, 1, .2)
crqs = brewer.pal(5, "Greens")
spplot(spdfR, "sd", col = "transparent", at = rngs,
sp.layout = list("sp.lines", states.sl),
col.regions = crqs,
colorkey = list(space = "bottom", labels = paste(rngs)),
par.settings = list(axis.line = list(col = NA)),
sub = expression("Standard Error"))
Write out the shapefiles.
writeOGR(spdfR, dsn = "All", layer = "Rates", overwrite_layer = TRUE,
driver = "ESRI Shapefile")
Repeat for EF1+ tornadoes.
TornP3 = subset(TornP3, mag >= 1) #EF1+
ct = sp::over(counties.sp, TornP3, returnList = TRUE)
area = gArea(counties.sp, byid = TRUE)
names(ct) = county
nT = sapply(ct, function(x) nrow(x))
nTd = sapply(ct, function(x) length(unique(x$date)))
Nyears = diff(range(TornP3$yr)) + 1
nTor.df = data.frame(county = county,
Nyears = Nyears,
nT = nT,
area = area/1000000,
nTd = nTd,
rate = nT/area * 10^6 * 100 * 100 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county)
)
spdf$county = county
spdf$Nyears = nTor.df$Nyears
spdf$nT = nTor.df$nT
spdf$nTd = nTor.df$nTd
spdf$area = nTor.df$area
spdf$rate = nTor.df$rate
PathsAll.df = plyr::ldply(ct, data.frame)
colnames(PathsAll.df)[1] = "county"
none = nTor.df$county[nTor.df$nT == 0] # for counties without tornadoes
if(length(none) > 0){
tmp = PathsAll.df[1:length(none), ]
tmp[!is.na(tmp)] = NA
tmp$county = none
PathsAll.df = rbind(PathsAll.df, tmp)
}
length(none)/dim(pop)[1] * 100
## [1] 2.306273
nTor.year = PathsAll.df %>%
group_by(yr) %>%
summarize(nT = n()) %>%
filter(yr != "NA") %>%
data.frame()
nTor.year$yr2 = nTor.year$yr
sum(nTor.year$nT)
## [1] 24266
nTor.day0 = PathsAll.df %>%
group_by(county, yr) %>%
summarize(nT = length(county),
nTd = length(unique(date)))
nTor.day0$county = as.factor(nTor.day0$county)
nTor.day0$yr = as.factor(nTor.day0$yr)
nTor.day0 = left_join(expand.grid(county = levels(as.factor(PathsAll.df$county)),
yr = levels(as.factor(PathsAll.df$yr))),
nTor.day0)
## Joining by: c("county", "yr")
nTor.day0[is.na(nTor.day0)] = 0
nTor.day0$county = as.character(nTor.day0$county)
nTor.day0$yr = as.integer(as.character(nTor.day0$yr))
nTor.day = base::merge(nTor.df[c("county", "area", "ID")],
nTor.day0) %>%
arrange(ID, yr)
nTor.day$county = as.integer(nTor.day$county)
popNtor = base::merge(nTor.day, Pop.df, all = FALSE) %>%
mutate(density = pop/area,
Ldensity = log2(density),
yr2 = yr) %>% #needed for the INLA models
arrange(ID, yr)
popNtor$ID2 = popNtor$ID
tmp = subset(popNtor, yr == 1991)
tmp$Ldensity = max(popNtor$Ldensity)
tmp$yr = 2016
tmp$yr2 = tmp$yr
tmp$nT = NA
popNtor2 = rbind(tmp, popNtor)
mean(popNtor2$area)
## [1] 3502.515
formula = nT ~ f(ID, model = "besag", graph = tornb.inla, constr = TRUE) +
Ldensity +
I(yr - 1991)
model = inla(formula = formula, family = "nbinomial", E = area/3500,
data = popNtor2,
quantiles = c(.05, .5, .95),
control.compute = control$compute,
control.predictor = control$predictor,
control.results = control$results,
control.family = control$family
)
df = model$summary.fitted.values[1:nrow(tmp), ] * 100 * 100 / 3500 #per sq 100 km
#df = model$summary.fitted.values[1:nrow(tmp), ] * 100 * 100 / 2000 / .386102 #per sq 100 miles
row.names(df) = county
cpopit = data.frame(model$cpo,model$summary.fitted.values)[!is.na(model$cpo$pit), ]
modPIT = cpopit$pit - runif(nrow(cpopit)) * cpopit$cpo
goftest::ad.test(modPIT)
##
## Anderson-Darling test of goodness-of-fit
## Null hypothesis: uniform distribution
##
## data: modPIT
## An = 2.3749, p-value = 0.05766
-mean(log(cpopit$cpo)) #equivalent to a predictive mean square error (lower is better).
## [1] 0.5774859
cor.test(popNtor$nT, cpopit$mean, conf.level = .95)
##
## Pearson's product-moment correlation
##
## data: popNtor$nT and cpopit$mean
## t = 64.778, df = 99726, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1949795 0.2068911
## sample estimates:
## cor
## 0.2009427
brier.score(popNtor[["nT"]], cpopit) # (lower is better)
## [1] 0.431243
spdfR = spCbind(spdf, df)
ggplot(as.data.frame(spdfR), aes(x = rate, y = mean)) +
geom_point() +
geom_abline(slope = 1) +
scale_x_continuous(limits = c(0, 5), breaks = seq(0, 5, 1)) +
scale_y_continuous(limits = c(0, 5), breaks = seq(0, 5, 1)) +
geom_smooth(method = lm, se = FALSE, color = "red") +
xlab("Raw Rate\n [Tornadoes/100 km square region]") +
ylab("Expected Rate\n [Tornadoes/100 km square region]")
cor(spdfR$rate, spdfR$mean)
## [1] 0.8438553
range(log2(spdfR$mean))
## [1] -6.2270547 0.8163864
rng = seq(-7, 1, 1)
crq = brewer.pal(8, "Oranges")
as.character(round(2^rng, 3))
## [1] "0.008" "0.016" "0.031" "0.062" "0.125" "0.25" "0.5" "1" "2"
labs = as.character(round(2^rng, 3))
spdfR$mean2 = log2(spdfR$mean)
spplot(spdfR, "mean2", col = "transparent", at = rng,
sp.layout = list("sp.lines", states.sl),
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
par.settings = list(axis.line = list(col = NA)),
sub = expression("Expected Annual Tornado (EF1+) Occurrence Rate [per 100 km square region]"))
Write out the shapefiles.
writeOGR(spdfR, dsn = "All1", layer = "Rates", overwrite_layer = TRUE,
driver = "ESRI Shapefile")