suppressMessages(library(httr))
suppressMessages(library(moments))
suppressMessages(library(raster))
suppressMessages(library(spdep))
suppressMessages(library(splines))
useDrop <- function(x, use.fun = source, ...){
tf <- tempfile()
tmp <- GET(x)
stop_for_status(tmp)
writeBin(content(tmp, type = "raw"), tf)
use.fun(tf, ...)
}
useDrop("https://dl.dropboxusercontent.com/s/bw5hvvj9e732e8i/CountyStatisticsSupport.R", source)
## [1] "Sourcing CountyStatisticsSupport.R on Wed Sep 17 09:27:10 2014"
## [1] "loading packages"
## rgeos version: 0.3-4, (SVN revision 438)
## GEOS runtime version: 3.3.3-CAPI-1.7.4
## Polygon checking: TRUE
##
## rgdal: version: 0.8-16, (SVN revision 498)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj
## Checking rgeos availability: TRUE
download.file("http://www.spc.noaa.gov/gis/svrgis/zipped/tornado.zip",
"tornado.zip", mode="wb")
#setwd("~/Dropbox/Tornadoes/CountyStatistics")
unzip("tornado.zip")
TornL = changeTornDataTypes(readOGR(dsn = ".", layer = "tornado",
stringsAsFactors = FALSE))
## OGR data source with driver: ESRI Shapefile
## Source: ".", layer: "tornado"
## with 57988 features and 21 fields
## Feature type: wkbLineString with 2 dimensions
useDrop("http://data.biogeo.ucdavis.edu/data/gadm2/R/USA_adm2.RData",
load, envir = sys.frame())
KS.sp = gadm[gadm$NAME_1 == "Kansas", ]
county = tolower(KS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(KS.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "KS", county = county, Nyears = Nyears,
nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
spdf = SpatialPolygonsDataFrame(counties.sp, nTor.df)
suppressMessages(library(wesanderson))
crq = wes.palette(name = "Zissou", type = "continuous")
range(spdf$nT)
## [1] 2 39
rng = seq(0, 40, 5)
spplot(spdf, "nT", col = "white", at = rng,
col.regions = crq(8),
colorkey = list(space = "bottom", labels = paste(rng)),
sub = "Number of EF1+ Tornadoes (1954-2013)")
plotTorn <- function(x, tracks){
CRS0 = CRS("+proj=longlat +datum=WGS84")
#x is the spatial polygons object
#tracks is the track file for each polygon.
bc = getBordersAndCenters(x, CRS0)
bc$centers$nT = x$nT
bc$centers$rate = round(x$rate, 2)
borders.df = merge(bc$centers[c("county", "nT", "rate")],
bc$borders, by = "county")
#Use this data to make the plot of intensity
mapT = ggplot(borders.df, aes(x = long, y = lat, group = county, fill = nT)) +
geom_polygon(colour = "gray", size = .2) +
coord_map(project="polyconic") +
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
panel.background=element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position="none") +
# labs(title="Number of EF1+ Tornadoes\n(1954-2013)") +
xlab("") + ylab("") +
geom_text(data=bc$centers, aes(x = long, y = lat, group = NULL, label = nT),
vjust = .5, size = 5, color = "white")
#generate the tracks data from the track.df file
tracks = tracks[!duplicated(tracks$OM), ]
tracks = subset(tracks, ELAT != 0)
mapT +
geom_segment(aes(x = SLON, xend = ELON, y = SLAT, yend = ELAT,
group = NULL, fill = NULL),
data = tracks, lineend = "round",
color = "black", size = 1.5, alpha = .6) +
geom_text(data=bc$centers, aes(x = long, y = lat, group = NULL, label = nT),
vjust = .5, size = 5, color="white") +
theme(plot.title = element_text(vjust = -2,
hjust = .3,
size = 20,
face = "bold"))
}
print(plotTorn(x = spdf, tracks = TracksAll.df) +
scale_fill_gradientn(colours = crq(10)))
cor.test(spdf$nT, spdf$area)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 2.856, df = 103, p-value = 0.005184
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0836 0.4398
## sample estimates:
## cor
## 0.2709
pop = useDrop("https://www.dropbox.com/s/o8s6423r5k290xs/PEP_2012_PEPANNRES.csv",
read.csv, stringsAsFactors = FALSE)
#pop = read.csv("PEP_2013_PEPANNRES.csv", stringsAsFactors = FALSE)
spdf$pop = processCity(pop, field = "respop72012")
spdf$Lpop = log10(spdf$pop)
sort(spdf$pop)
## [1] 1298 1517 1704 1913 1963 2175 2181 2256 2496 2538
## [11] 2560 2578 2639 2678 2720 2729 2757 2784 2871 2979
## [21] 2986 3046 3068 3169 3174 3220 3278 3571 3765 3806
## [31] 3968 4256 4358 4396 4858 4861 4937 5223 5519 5612
## [41] 5756 5758 5854 5911 6030 6072 6113 6355 6454 6494
## [51] 6928 6946 7039 7863 7864 7917 7923 7941 8502 8531
## [61] 9105 9397 9441 9728 9881 9985 10022 10132 12347 13319
## [71] 13449 14897 16142 16406 16813 18945 19762 21226 21284 22302
## [81] 23547 23674 25906 27557 29053 29356 32612 33748 34459 34752
## [91] 34852 36288 37200 38013 39361 55988 64438 65827 75508 77739
## [101] 112864 159129 178991 503889 559913
spdf$county[order(spdf$pop)]
## [1] "greeley" "wallace" "lane" "comanche"
## [5] "hodgeman" "stanton" "clark" "wichita"
## [9] "kiowa" "sheridan" "rawlins" "graham"
## [13] "hamilton" "cheyenne" "elk" "gove"
## [17] "chase" "logan" "decatur" "edwards"
## [21] "trego" "jewell" "ness" "morton"
## [25] "lincoln" "rush" "woodson" "chautauqua"
## [29] "smith" "osborne" "kearny" "haskell"
## [33] "stafford" "meade" "republic" "barber"
## [37] "scott" "rooks" "phillips" "norton"
## [41] "stevens" "washington" "morris" "harper"
## [45] "gray" "ottawa" "sherman" "mitchell"
## [49] "greenwood" "ellsworth" "pawnee" "russell"
## [53] "wabaunsee" "kingman" "doniphan" "anderson"
## [57] "grant" "thomas" "coffey" "clay"
## [61] "wilson" "cloud" "linn" "pratt"
## [65] "brown" "rice" "marshall" "nemaha"
## [69] "marion" "allen" "jackson" "bourbon"
## [73] "osage" "neosho" "atchison" "jefferson"
## [77] "dickinson" "cherokee" "labette" "pottawatomie"
## [81] "seward" "sumner" "franklin" "barton"
## [85] "ellis" "mcpherson" "miami" "lyon"
## [89] "montgomery" "ford" "harvey" "cowley"
## [93] "finney" "geary" "crawford" "saline"
## [97] "reno" "butler" "riley" "leavenworth"
## [101] "douglas" "wyandotte" "shawnee" "sedgwick"
## [105] "johnson"
cor.test(spdf$nT, spdf$pop)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$pop
## t = 1.867, df = 103, p-value = 0.06467
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01107 0.36015
## sample estimates:
## cor
## 0.181
suppressMessages(library(RColorBrewer))
range(spdf$Lpop)
## [1] 3.113 5.748
rng = seq(3, 6, .5)
crq = brewer.pal(7, "Blues")
spdf$density = spdf$pop/spdf$area
spdf$Ldensity = log10(spdf$density)
range(spdf$Ldensity)
## [1] -0.1881 2.6598
rng = seq(-.2, 2.8, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdf, "Ldensity", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
sub = "Population Density (2013) [persons per square km]")
cor.test(spdf$nT, spdf$area)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$area
## t = 2.856, df = 103, p-value = 0.005184
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0836 0.4398
## sample estimates:
## cor
## 0.2709
#source("http://www.math.ntnu.no/inla/givemeINLA.R")
nb = poly2nb(spdf)
wts = nb2listw(nb)
suppressMessages(library(INLA))
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity +
f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
data = spdf@data, control.predictor = list(compute = TRUE),
control.results = list(return.marginals.random = TRUE),
control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
dic = TRUE, po = TRUE))
summary(spdf.inla)
##
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdf@data, ", " E = area, control.compute = list(config = TRUE, mlik = TRUE, ", " cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ", " control.results = list(return.marginals.random = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.1138 0.1722 0.0843 0.3703
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -4.9262 0.0781 -5.0801 -4.9261 -4.773 -4.926 0
## Ldensity 0.3524 0.0953 0.1653 0.3522 0.540 0.352 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 9.329 2.146
## Precision for ID 17.992 17.994
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 5.692 9.146
## Precision for ID 4.198 12.519
## 0.975quant mode
## size for the nbinomial observations (overdispersion) 14.079 8.819
## Precision for ID 64.588 7.443
##
## Expected number of effective parameters(std dev): 18.35(7.739)
## Number of equivalent replicates : 5.723
##
## Deviance Information Criterion: 707.38
## Effective number of parameters: 20.09
##
## Marginal Likelihood: -451.62
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$random = spdf.inla$summary.random$ID$mean
spdf$fitted = spdf.inla$summary.fitted.values$mean
print(ggplotSpdata(spdf, "random", tfun = function(x) round((exp(x) - 1) * 100)) +
# ggtitle("Posterior Mean Random Effect (%)") +
theme(plot.title = element_text(vjust = 0, hjust = .5, size = 16, face = "bold")))
spdf$nTadj = mean(spdf$nT) + (mean(spdf$nT) * (exp(spdf$random) - 1))
rng = seq(12, 27, 3)
crq = brewer.pal(6, "Reds")
spplot(spdf, "nTadj", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = as.character(rng)),
sub = "Adjusted Tornado Occurrence")
spdf$county[(exp(spdf$random) - 1) * 100 > 25]
## [1] "barton" "brown" "edwards" "pawnee" "stafford"
TornL2.df = as.data.frame(TornL2)
TornL2.df = subset(TornL2.df, ST == "KS")
sum(TornL2.df$Month >= 4 & TornL2.df$Month <= 6)/dim(TornL2.df)[1] * 100
## [1] 72.8
IL.sp = gadm[gadm$NAME_1 == "Illinois", ]
county = tolower(IL.sp$NAME_2)
county = county[-50] #Remove Lake Michigan from Ill
IL.sp = IL.sp[-50, ] #Remove Lake Michigan from Ill
countiesun.sp = geometry(spChFIDs(IL.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "IL", county = county, Nyears = Nyears,
nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
spdfIL = SpatialPolygonsDataFrame(counties.sp, nTor.df)
# set lat_0 and lon_0 based on Wolfram Alpha center coordinates Illinois
p4sIL = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40.07 +lon_0=-89.2 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdfIL = spTransform(spdfIL, CRS(p4sIL))
range(spdfIL$nT)
## [1] 1 48
crq = wes.palette(name = "Zissou", type = "continuous")
rng = seq(0, 50, 5)
spplot(spdfIL, "nT", col = "white", at = rng,
col.regions = crq(10),
colorkey = list(space = "bottom", labels = paste(rng)),
sub = "Number of EF1+ Tornadoes (1954-2013)")
print(plotTorn(x = spdfIL, tracks = TracksAll.df) +
scale_fill_gradientn(colours = crq(10)))
cor.test(spdfIL$nT, spdfIL$area)
##
## Pearson's product-moment correlation
##
## data: spdfIL$nT and spdfIL$area
## t = 9.68, df = 100, p-value = 4.441e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5794 0.7839
## sample estimates:
## cor
## 0.6955
pop = read.csv("PEP_2013_PEPANNRES_IL.csv", stringsAsFactors = FALSE)
pop[82, 3] = "Saint Clair County, Illinois"
pop[19, 3] = "De Kalb County, Illinois"
pop[50, 3] = "La Salle County, Illinois"
#spdfIL$pop = processCity(pop, field = "respop72012")
ctys = sapply(strsplit(pop$GEO.display.label, ","), function(x) x[1])
ctys2 = tolower(sapply(strsplit(ctys, " "), function(x) x[1]))
pop = pop[order(ctys2), ]
all(tolower(substring(pop$GEO.display.label, 1, nchar(spdfIL$county))) == spdfIL$county)
## [1] TRUE
spdfIL$pop = pop$respop72012
spdfIL$Lpop = log10(spdfIL$pop)
sort(spdfIL$pop)
## [1] 4256 4271 5021 5282 5439 5876 5948 5960
## [9] 6724 6908 7062 7490 7752 8386 9656 10977
## [17] 11746 12315 12704 12786 13421 13622 13746 13997
## [25] 14372 14584 14640 14946 15047 15174 16214 16223
## [33] 16276 16298 16484 16511 16592 16622 17612 17649
## [41] 17808 18160 18900 19605 19883 22025 22078 22205
## [49] 22552 22738 25007 29257 29688 30027 32572 32956
## [57] 33310 34314 34335 34597 35132 35309 36673 38083
## [65] 38525 38677 38868 38965 39482 46943 47205 50177
## [73] 50208 52254 52850 53659 53859 57832 60113 66714
## [81] 67243 80715 104622 109978 112847 112944 118108 136122
## [89] 147514 172295 187255 199269 203435 267899 268714 291844
## [97] 307729 521306 681590 701219 927418 5227992
spdfIL$county[order(spdfIL$pop)]
## [1] "hardin" "pope" "calhoun" "scott" "gallatin"
## [6] "putnam" "stark" "pulaski" "edwards" "brown"
## [11] "henderson" "schuyler" "alexander" "hamilton" "jasper"
## [16] "cumberland" "wabash" "marshall" "menard" "johnson"
## [21] "cass" "greene" "clay" "ford" "mason"
## [26] "white" "washington" "moultrie" "carroll" "massac"
## [31] "richland" "mercer" "pike" "clark" "de witt"
## [36] "piatt" "lawrence" "wayne" "bond" "union"
## [41] "warren" "edgar" "hancock" "crawford" "douglas"
## [46] "fayette" "perry" "shelby" "jo daviess" "jersey"
## [51] "saline" "iroquois" "montgomery" "logan" "mcdonough"
## [56] "randolph" "monroe" "bureau" "effingham" "christian"
## [61] "lee" "morgan" "fulton" "clinton" "livingston"
## [66] "jefferson" "marion" "woodford" "franklin" "stephenson"
## [71] "macoupin" "henry" "grundy" "knox" "ogle"
## [76] "coles" "boone" "whiteside" "jackson" "williamson"
## [81] "adams" "vermilion" "de kalb" "macon" "kankakee"
## [86] "la salle" "kendall" "tazewell" "rock island" "mclean"
## [91] "peoria" "sangamon" "champaign" "madison" "saint clair"
## [96] "winnebago" "mchenry" "kane" "will" "lake"
## [101] "dupage" "cook"
cor.test(spdfIL$nT, spdfIL$pop)
##
## Pearson's product-moment correlation
##
## data: spdfIL$nT and spdfIL$pop
## t = 3.385, df = 100, p-value = 0.001019
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1345 0.4849
## sample estimates:
## cor
## 0.3206
range(spdfIL$Lpop)
## [1] 3.629 6.718
rng = seq(3, 7, .5)
crq = brewer.pal(9, "Blues")
spdfIL$density = spdfIL$pop/spdfIL$area
spdfIL$Ldensity = log10(spdfIL$density)
range(spdfIL$Ldensity)
## [1] 0.6438 3.3254
rng = seq(.6, 3.6, .6)
crq = brewer.pal(6, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdfIL, "Ldensity", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
sub = "Population Density (2013) [per square km]")
cor.test(spdfIL$nT, spdfIL$area)
##
## Pearson's product-moment correlation
##
## data: spdfIL$nT and spdfIL$area
## t = 9.68, df = 100, p-value = 4.441e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5794 0.7839
## sample estimates:
## cor
## 0.6955
cor.test(spdfIL$nT, spdfIL$density)
##
## Pearson's product-moment correlation
##
## data: spdfIL$nT and spdfIL$density
## t = 3.16, df = 100, p-value = 0.002089
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1135 0.4683
## sample estimates:
## cor
## 0.3013
nb = poly2nb(spdfIL)
wts = nb2listw(nb)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity +
f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
data = spdfIL@data, control.predictor = list(compute = TRUE),
control.results = list(return.marginals.random = TRUE),
control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
dic = TRUE, po = TRUE))
summary(spdf.inla)
##
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdfIL@data, ", " E = area, control.compute = list(config = TRUE, mlik = TRUE, ", " cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ", " control.results = list(return.marginals.random = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.0809 0.1245 0.0567 0.2621
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -5.002 0.1515 -5.3045 -5.0003 -4.7102 -4.9960 0
## Ldensity 0.336 0.1002 0.1444 0.3343 0.5371 0.3305 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 10.760 2.313
## Precision for ID 148.642 411.556
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 6.854 10.551
## Precision for ID 10.581 56.553
## 0.975quant mode
## size for the nbinomial observations (overdispersion) 15.883 10.164
## Precision for ID 851.706 20.674
##
## Expected number of effective parameters(std dev): 8.361(4.534)
## Number of equivalent replicates : 12.20
##
## Deviance Information Criterion: 626.15
## Effective number of parameters: 10.63
##
## Marginal Likelihood: -405.04
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdfIL$random = spdf.inla$summary.random$ID$mean
spdfIL$fitted = spdf.inla$summary.fitted.values$mean
print(ggplotSpdata(spdfIL, "random", tfun = function(x) round((exp(x) - 1) * 100)))
TornL2.df = as.data.frame(TornL2)
IL.df = subset(TornL2.df, ST == "IL")
IL.df$MLAT = IL.df$SLAT > 40.07
IL.df$NS = ifelse(IL.df$MLAT, "North", "South")
IL.df$MonthABB = factor(month.abb[IL.df$Month], levels = month.abb)
ggplot(IL.df, aes(MonthABB, fill = NS)) +
geom_bar() +
facet_wrap(~NS, nrow = 2) +
theme_bw() +
theme(legend.position = "none") +
xlab("") +
ylab("Number of Tornado Reports in Illinois")
xx = ddply(IL.df, .(Month, MLAT), summarize,
nT = length(OM))
sum(xx$nT[xx$MLAT & xx$Month >= 4 & xx$Month <= 6])
## [1] 367
sum(xx$nT[!xx$MLAT & xx$Month >= 4 & xx$Month <= 6])
## [1] 362
sum(xx$nT[!xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])
## [1] 181
sum(xx$nT[xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])
## [1] 87
sum(xx$nT[!xx$MLAT & (xx$Month <=3 | xx$Month >= 10)])
## [1] 181
87/(87+181)
## [1] 0.3246
sum(xx$nT[xx$MLAT & xx$Month >= 4 & xx$Month <= 9])
## [1] 485
sum(xx$nT[!xx$MLAT & xx$Month >= 4 & xx$Month <= 9])
## [1] 432
485/(485+432)
## [1] 0.5289
MS.sp = gadm[gadm$NAME_1 == "Mississippi", ]
county = tolower(MS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(MS.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
TornL2 = subset(TornL, Year >= 1954 & EF >= 1)
ct = over(counties.sp, TornL2, returnList = TRUE)
nT = sapply(ct, function(x) nrow(x))
area = rgeos::gArea(counties.sp, byid = TRUE)
Nyears = diff(range(TornL2$Year)) + 1
nTor.df = data.frame(state = "MS", county = county, Nyears = Nyears,
nT = nT, area = area/1000000, rate = nT/area * 10^6 * 10000 / Nyears,
stringsAsFactors = FALSE, ID = 1:length(county))
TracksAll.df = ldply(ct, data.frame)
TracksAll.df$county = rep(county, sapply(ct, nrow))
nTor.df.day = ddply(TracksAll.df, .(county, Year), summarize,
nT = length(county),
nTd = length(unique(Date)), .drop = FALSE)
spdfMS = SpatialPolygonsDataFrame(counties.sp, nTor.df)
# set lat_0 and lon_0 based on Wolfram Alpha center coordinates Mississippi
p4sMS = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=32.8 +lon_0=-89.7 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
spdfMS = spTransform(spdfMS, CRS(p4sMS))
range(spdfMS$nT)
## [1] 6 57
crq = wes.palette(name = "Zissou", type = "continuous")
rng = seq(0, 60, 5)
spplot(spdfMS, "nT", col = "white", at = rng,
col.regions = crq(12),
colorkey = list(space = "bottom", labels = paste(rng)),
sub = "Number of EF1+ Tornadoes (1954-2013)")
print(plotTorn(x = spdfMS, tracks = TracksAll.df) +
scale_fill_gradientn(colours = crq(10)))
cor.test(spdfMS$nT, spdfMS$area)
##
## Pearson's product-moment correlation
##
## data: spdfMS$nT and spdfMS$area
## t = 5.326, df = 80, p-value = 9.005e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3314 0.6558
## sample estimates:
## cor
## 0.5116
pop = read.csv("PEP_2013_PEPANNRES_MS.csv", stringsAsFactors = FALSE, header = TRUE)
pop = pop[-1, ]
pop[c(33, 32), ] = pop[c(32, 33), ]
#spdfMS$pop = processCity(pop, field = "respop72012")
ctys = sapply(strsplit(pop$GEO.display.label, ","), function(x) x[1])
ctys2 = tolower(sapply(strsplit(ctys, " "), function(x) x[1]))
pop = pop[order(ctys2), ]
all(tolower(substring(pop$GEO.display.label, 1, nchar(spdfMS$county))) == spdfMS$county)
## [1] TRUE
spdfMS$pop = as.numeric(pop$respop72012)
spdfMS$Lpop = log10(spdfMS$pop)
sort(spdfMS$pop)
## [1] 1393 4805 7659 7798 7912 8333 8713 9207 9377 9457
## [11] 10074 10379 10439 10510 10596 11210 12089 12095 12402 12557
## [21] 12966 14300 14842 15103 15106 16366 16539 16554 17443 18060
## [31] 18743 19011 19107 19566 19615 20428 20656 21568 21613 21925
## [41] 22885 23289 23353 25306 25667 26395 27345 27398 28220 28260
## [51] 28442 28499 28964 29698 30498 31565 32145 34078 34427 34896
## [61] 36422 36545 37228 40118 45266 48108 48992 50078 50502 55248
## [71] 57739 59698 68582 76925 80258 84960 98555 140039 145173 166317
## [81] 193702 248149
spdfMS$county[order(spdfMS$pop)]
## [1] "issaquena" "sharkey" "jefferson"
## [4] "quitman" "franklin" "choctaw"
## [7] "benton" "humphreys" "claiborne"
## [10] "wilkinson" "webster" "kemper"
## [13] "carroll" "tunica" "montgomery"
## [16] "noxubee" "jefferson davis" "perry"
## [19] "yalobusha" "lawrence" "amite"
## [22] "greene" "calhoun" "walthall"
## [25] "tallahatchie" "smith" "clarke"
## [28] "jasper" "chickasaw" "stone"
## [31] "holmes" "winston" "attala"
## [34] "covington" "tishomingo" "clay"
## [37] "wayne" "newton" "grenada"
## [40] "tippah" "george" "leake"
## [43] "itawamba" "prentiss" "coahoma"
## [46] "marion" "simpson" "union"
## [49] "yazoo" "scott" "sunflower"
## [52] "tate" "copiah" "neshoba"
## [55] "pontotoc" "leflore" "adams"
## [58] "bolivar" "panola" "lincoln"
## [61] "monroe" "marshall" "alcorn"
## [64] "pike" "hancock" "warren"
## [67] "oktibbeha" "washington" "lafayette"
## [70] "pearl river" "lamar" "lowndes"
## [73] "jones" "forrest" "lauderdale"
## [76] "lee" "madison" "jackson"
## [79] "rankin" "desoto" "harrison"
## [82] "hinds"
cor.test(spdfMS$nT, spdfMS$pop)
##
## Pearson's product-moment correlation
##
## data: spdfMS$nT and spdfMS$pop
## t = 5.433, df = 80, p-value = 5.818e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3405 0.6616
## sample estimates:
## cor
## 0.5192
range(spdfMS$Lpop)
## [1] 3.144 5.395
rng = seq(3, 6, .5)
crq = brewer.pal(9, "Blues")
spdfMS$density = spdfMS$pop/spdfMS$area
spdfMS$Ldensity = log10(spdfMS$density)
range(spdfMS$Ldensity)
## [1] 0.08499 2.11568
rng = seq(.08, 2.5, .4)
crq = brewer.pal(7, "Blues")
labs = as.character(round(10^rng, 0))
spplot(spdfMS, "Ldensity", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom", labels = labs),
sub = "Population Density (2013) [people per square km]")
cor.test(spdfMS$nT, spdfMS$area)
##
## Pearson's product-moment correlation
##
## data: spdfMS$nT and spdfMS$area
## t = 5.326, df = 80, p-value = 9.005e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3314 0.6558
## sample estimates:
## cor
## 0.5116
cor.test(spdfMS$nT, spdfMS$density)
##
## Pearson's product-moment correlation
##
## data: spdfMS$nT and spdfMS$density
## t = 3.604, df = 80, p-value = 0.0005431
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1706 0.5464
## sample estimates:
## cor
## 0.3737
nb = poly2nb(spdfMS)
wts = nb2listw(nb)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ Ldensity +
f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
data = spdfMS@data, control.predictor = list(compute = TRUE),
control.results = list(return.marginals.random = TRUE),
control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
dic = TRUE, po = TRUE))
summary(spdf.inla)
##
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdfMS@data, ", " E = area, control.compute = list(config = TRUE, mlik = TRUE, ", " cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ", " control.results = list(return.marginals.random = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.0809 0.1113 0.0536 0.2458
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -4.7792 0.1747 -5.1234 -4.7790 -4.4364 -4.779 0
## Ldensity 0.4122 0.1363 0.1453 0.4118 0.6811 0.411 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 9.784 2.254
## Precision for ID 28.311 34.084
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 5.998 9.579
## Precision for ID 5.287 18.047
## 0.975quant mode
## size for the nbinomial observations (overdispersion) 14.802 9.207
## Precision for ID 113.588 9.822
##
## Expected number of effective parameters(std dev): 12.79(5.601)
## Number of equivalent replicates : 6.41
##
## Deviance Information Criterion: 560.38
## Effective number of parameters: 14.51
##
## Marginal Likelihood: -359.45
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdfMS$random = spdf.inla$summary.random$ID$mean
spdfMS$fitted = spdf.inla$summary.fitted.values$mean
print(ggplotSpdata(spdfMS, "random", tfun = function(x) round((exp(x) - 1) * 100)))
KS.sp = gadm[gadm$NAME_1 == "Kansas", ]
county = tolower(KS.sp$NAME_2)
countiesun.sp = geometry(spChFIDs(KS.sp, county))
counties.sp = spTransform(countiesun.sp, CRS(proj4string(TornL)))
download.file(url = "http://www.viewfinderpanoramas.org/DEM/TIF15/15-H.zip",
destfile = "15-H.ZIP", mode = "wb")
unzip("15-H.ZIP")
Elev = raster("15-H.tif")
Elev1 = as(crop(Elev, extent(countiesun.sp)), "SpatialGridDataFrame")
proj4string(Elev1) = proj4string(countiesun.sp) #same projection, different order of parameters
## Warning: A new CRS was assigned to an object with an existing CRS:
## +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
## without reprojecting.
## For reprojection, use function spTransform in package rgdal
Elev.data = over(countiesun.sp, Elev1, returnList = TRUE)
Elev.df = data.frame(county = rep(county, sapply(Elev.data,nrow)),
Elev = unlist(Elev.data), stringsAsFactors = FALSE)
CE.df = ddply(Elev.df, .(county), summarize,
elev = mean(Elev, na.rm = TRUE),
elevS = sd(Elev, na.rm = TRUE),
elevK = kurtosis(Elev, na.rm = TRUE),
elevCV = elevS/elev)
all(spdf$county == CE.df$county)
## [1] TRUE
spdf@data[colnames(CE.df)] = CE.df
mycolors = div_gradient_pal(low = muted("blue"),
mid = "white",
high = muted("orange"),
space = "rgb")(seq(0, 1, length = 104))
t1 = list("sp.lines", as(KS.sp, "SpatialLines"))
range(Elev1@data)
## [1] 211 1227
spplot(Elev1, col.regions = mycolors,
scales = list(draw = TRUE),
at = seq(211, 1227, length = 104),
sp.layout = t1,
colorkey = list(space = "bottom"),
sub = "Elevation (m)")
range(spdf$elevS)
## [1] 11.34 73.53
rng = seq(10, 80, 10)
crq = brewer.pal(7, "YlOrBr")
spplot(spdf, "elevS", col = "white", at = rng,
col.regions = crq,
colorkey = list(space = "bottom"),
sub = "Elevation Standard Deviation [m]")
cor.test(spdf$nT, spdf$elevS)
##
## Pearson's product-moment correlation
##
## data: spdf$nT and spdf$elevS
## t = -2.201, df = 103, p-value = 0.02998
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.38784 -0.02112
## sample estimates:
## cor
## -0.2119
nb = poly2nb(spdf)
wts = nb2listw(nb)
nb2INLA("tornb.inla", nb)
tornb.inla = inla.read.graph("tornb.inla")
spdf.inla.formula = nT ~ elevS + Ldensity +
f(ID, model = "besag", graph = tornb.inla)
spdf.inla = inla(formula = spdf.inla.formula, family = "nbinomial", E = area,
data = spdf@data, control.predictor = list(compute = TRUE),
control.results = list(return.marginals.random = TRUE),
control.compute = list(config = TRUE, mlik = TRUE, cpo = TRUE,
dic = TRUE, po = TRUE))
summary(spdf.inla)
##
## Call:
## c("inla(formula = spdf.inla.formula, family = \"nbinomial\", data = spdf@data, ", " E = area, control.compute = list(config = TRUE, mlik = TRUE, ", " cpo = TRUE, dic = TRUE, po = TRUE), control.predictor = list(compute = TRUE), ", " control.results = list(return.marginals.random = TRUE))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.0835 0.2443 0.0549 0.3827
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -4.3926 0.1868 -4.7600 -4.3926 -4.0258 -4.3925 0
## elevS -0.0151 0.0048 -0.0246 -0.0151 -0.0056 -0.0151 0
## Ldensity 0.2392 0.0979 0.0477 0.2389 0.4325 0.2382 0
##
## Random effects:
## Name Model
## ID Besags ICAR model
##
## Model hyperparameters:
## mean sd
## size for the nbinomial observations (overdispersion) 10.259 2.332
## Precision for ID 20.551 20.803
## 0.025quant 0.5quant
## size for the nbinomial observations (overdispersion) 6.317 10.056
## Precision for ID 4.755 14.221
## 0.975quant mode
## size for the nbinomial observations (overdispersion) 15.431 9.690
## Precision for ID 74.281 8.411
##
## Expected number of effective parameters(std dev): 18.34(7.344)
## Number of equivalent replicates : 5.724
##
## Deviance Information Criterion: 698.61
## Effective number of parameters: 20.22
##
## Marginal Likelihood: -455.58
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
spdf$random2 = spdf.inla$summary.random$ID$mean
ggplotmargin(spdf.inla, type = "fixed", effect = "elevS",
xlab = "Elevation Standard Deviation Coefficient") +
theme(title = element_text(size = 20, face = "bold"))
temp = inla.posterior.sample(n = 10000, spdf.inla)
## Warning: inla.posterior.sample: THIS FUNCTION IS EXPERIMENTAL!!!
temp1 = sapply(temp, function(x) x$latent["elevS.1", 1] * 100)
df1 = as.data.frame(temp1)
ggplot(df1, aes(temp1)) +
geom_histogram(binwidth = .05, fill = 'gray') +
geom_vline(xintercept = 0, color = "red") +
ylab("Number of Posterior Samples") +
xlab("Elevation Standard Deviation Coefficient\n[Tornadoes/10,000 sq km/century per m]") +
theme_bw()
sum(df1$temp1 < 0)/length(df1$temp1) * 100
## [1] 99.95
quantile(df1$temp1, probs = c(.025, .975))
## 2.5% 97.5%
## -2.4634 -0.5467
rm(list=c("temp", "temp1"))
hiWays = read.table("MajorHiWays_KS.txt", header = TRUE,
stringsAsFactors = FALSE)
spdf$highway = hiWays$highway[order(hiWays$county)]