Set working directory setWD() Load all required libraries {r echo=TRUE, message=FALSE}
require('knitr')
require('kableExtra')
require('lubridate')
require('R.utils')
require('data.table')
require('dplyr')
require('plyr')
require('Hmisc')
require('ggplot2')
require('forcats')
require('gridExtra')
url<-'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
localBZFile<-'repdata%2Fdata%2FStormData.csv.bz2'
localFile<-'repdata_Fdata_FStormData.csv'
localfrFile<-'file://repdata_Fdata_FStormData.csv'
if (!file.exists(localBZFile)) {
download.file(url, destfile = localBZFile, method='curl')
}
# remove - If TRUE, the input file is removed afterward, otherwise not.
# Skip - If TRUE and the output file already exists, the output file is returned as is.
bunzip2(filename=localBZFile, destname = localFile, remove=FALSE, skip=TRUE )
## [1] "repdata_Fdata_FStormData.csv"
## attr(,"temporary")
## [1] FALSE
df<-fread(localFile, header = TRUE, select = c(7:8,23:28), strip.white=T)
##
Read 79.6% of 967216 rows
Read 902297 rows and 8 (of 37) columns from 0.523 GB file in 00:00:03
head(df)
## STATE EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1: AL TORNADO 0 15 25.0 K 0
## 2: AL TORNADO 0 0 2.5 K 0
## 3: AL TORNADO 0 2 25.0 K 0
## 4: AL TORNADO 0 2 2.5 K 0
## 5: AL TORNADO 0 2 2.5 K 0
## 6: AL TORNADO 0 6 2.5 K 0
Set type of selected columns on original data. require(data.table)
df <- mutate_all(df, funs(toupper))
df$STATE<-as.factor(df$STATE)
df$FATALITIES <-as.numeric(df$FATALITIES)
df$INJURIES <-as.numeric(df$INJURIES)
df$PROPDMGEXP <- as.factor(df$PROPDMGEXP)
df$CROPDMGEXP <- as.factor(df$CROPDMGEXP)
Select rows with valid factors. Eliminate invalid cost factors
df <- df[ df$PROPDMGEXP %in% c('B','H','K','M',''), ]
df <- df[ df$CROPDMGEXP %in% c('B','K','M','') , ]
Prep for costs. Make column with numbers according to cost code. require(dplyr)
df$PROPDMGEXPVAL <- as.numeric(mapvalues(df$PROPDMGEXP,
from=c('B','H','K','M',''),
to=c('1000000000','100','1000','1000000','1')
))
df$CROPDMGEXPVAL <- as.numeric(mapvalues(df$CROPDMGEXP,
from=c('B','K','M',''),
to=c('1000000000','1000','1000000','1')
))
Set type for created columns
df$PROPDMG <- as.numeric(df$PROPDMG, coerce=T)
df$CROPDMG <- as.numeric(df$CROPDMG, coerce=T)
df$PROPDMGEXPPLIER <- as.numeric( df$PROPDMGEXPVAL, coerce=T)
df$CROPDMGEXPPLIER <- as.numeric( df$CROPDMGEXPVAL, coerce=T)
df$DGMCOSTTOTAL <- df$PROPDMG*df$PROPDMGEXPPLIER + df$CROPDMG*df$CROPDMGEXPPLIER
Eliminate rows with no FATALITIES, INJURIES, DAMAGE. Eliminate zero costs and losses.
df<-df[df$DGMCOSTTOTAL > 0,]
Standardize the names some what by eliminating spurious characters. require(plyr)
df$EVTYPE <- gsub('[[:punct:]]',' ',df$EVTYPE)
df$EVTYPE <- gsub('[0-9/-@()]',' ',df$EVTYPE)
df$EVTYPE <- gsub('([\\])',' ',df$EVTYPE)
df$EVTYPE <- gsub(' ',' ',df$EVTYPE)
df$EVTYPE <- gsub('MPH',' ',df$EVTYPE)
df$EVTYPE <- gsub('WIND G', 'WIND',df$EVTYPE)
df$EVTYPE <- gsub('(?<!S)S\\b','', df$EVTYPE, perl=T)
df$EVTYPE <- gsub('FLOODING','FLOOD',df$EVTYPE)
df$EVTYPE <- gsub('FLOOD FLASH FLOOD','FLASH FLOOD',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE EMILY','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE ERIN','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE FELIX','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE EMILY','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE ERIN','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE FELIX','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE GORDON','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE OPAL','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('HURRICANE TYPHOON','HURRICANE',df$EVTYPE)
df$EVTYPE <- gsub('TORNADO F','TORNADO',df$EVTYPE)
df$EVTYPE <- gsub('TORNDAO','TORNADO',df$EVTYPE)
df$EVTYPE <- gsub('TORNADOE','TORNADO',df$EVTYPE)
df$EVTYPE <- gsub('THUNDERSTORMWIND','THUNDERSTORM WIND',df$EVTYPE)
df$EVTYPE <- gsub('THUNDERSTORMW','THUNDERSTORM WIND',df$EVTYPE)
df$EVTYPE <- gsub('THUNDERTORM','THUNDERSTORM',df$EVTYPE)
df$EVTYPE <- gsub('THUNDEERSTORM','THUNDERSTORM',df$EVTYPE)
df$EVTYPE <- gsub('TSTM','THUNDERSTORM',df$EVTYPE)
df$EVTYPE <- gsub('TROPICAL STORM ALBERT','TROPICAL STORM',df$EVTYPE)
df$EVTYPE <- gsub('TROPICAL STORM DEAN','TROPICAL STORM',df$EVTYPE)
df$EVTYPE <- gsub('TROPICAL STORM GORDON','TROPICAL STORM',df$EVTYPE)
df$EVTYPE <- gsub('TROPICAL STORM JERRY','TROPICAL STORM',df$EVTYPE)
df$EVTYPE <- gsub('WILDFIRE','WILD FIRE',df$EVTYPE)
df$EVTYPE <- gsub('WILD FOREST FIRE','WILD FIRE',df$EVTYPE)
df$EVTYPE <- trimws(df$EVTYPE)
df$EVTYPE <- as.factor(df$EVTYPE)
Post processing to this point, There are 191 terms. Some are adjectives describing event, not really event types. Others are nouns that could be considered events types. Some lexical priocessing could refine the results.
unique(df[order(df$EVTYPE),]$EVTYPE)
## [1] AGRICULTURAL FREEZE
## [3] APACHE COUNTY ASTRONOMICAL HIGH TIDE
## [5] ASTRONOMICAL LOW TIDE AVALANCHE
## [7] BEACH EROSION BLIZZARD
## [9] BLIZZARD WINTER STORM BLOWING DUST
## [11] BLOWING SNOW BRUSH FIRE
## [13] COASTAL EROSION COASTAL FLOOD
## [15] COASTAL FLOOD EROSION COASTAL STORM
## [17] COASTAL SURGE COLD
## [19] COLD AIR TORNADO COLD AND WET CONDITION
## [21] COLD WIND CHILL COOL AND WET
## [23] DAM BREAK DAMAGING FREEZE
## [25] DENSE FOG DENSE SMOKE
## [27] DOWNBURST DROUGHT
## [29] DROUGHT EXCESSIVE HEAT DRY MICROBURST
## [31] DUST DEVIL DUST DEVIL WATERSPOUT
## [33] DUST STORM DUST STORM HIGH WIND
## [35] EARLY FROST EROSION CSTL FLOOD
## [37] EXCESSIVE HEAT EXCESSIVE SNOW
## [39] EXCESSIVE WETNESS EXTENDED COLD
## [41] EXTREME COLD EXTREME COLD WIND CHILL
## [43] EXTREME HEAT EXTREME WIND CHILL
## [45] EXTREME WINDCHILL FLASH FLOOD
## [47] FLASH FLOOD HEAVY RAIN FLASH FLOOD FLOOD
## [49] FLASH FLOOD FROM ICE JAM FLASH FLOOD LANDSLIDE
## [51] FLASH FLOOD STREET FLASH FLOOD THUNDERSTORM WI
## [53] FLOOD FLOOD HEAVY RAIN
## [55] FLOOD FLASH FLOOD FLASHFLOOD
## [57] FLOOD RAIN WIND FLOOD RIVER FLOOD
## [59] FOG FOREST FIRE
## [61] FREEZE FREEZING DRIZZLE
## [63] FREEZING FOG FREEZING RAIN
## [65] FREEZING RAIN SLEET FREEZING RAIN SNOW
## [67] FROST FROST FREEZE
## [69] FUNNEL CLOUD GLAZE
## [71] GLAZE ICE GRADIENT WIND
## [73] GRASS FIRE GROUND BLIZZARD
## [75] GUSTNADO GUSTY WIND
## [77] GUSTY WIND HAIL GUSTY WIND HVY RAIN
## [79] GUSTY WIND RAIN HAIL
## [81] HAIL DAMAGE HAIL WIND
## [83] HAILSTORM HARD FREEZE
## [85] HEAT HEAT WAVE
## [87] HEAT WAVE DROUGHT HEAVY LAKE SNOW
## [89] HEAVY MIX HEAVY PRECIPITATION
## [91] HEAVY RAIN HEAVY RAIN AND FLOOD
## [93] HEAVY RAIN FLOOD HEAVY RAIN HIGH SURF
## [95] HEAVY RAIN LIGHTNING HEAVY RAIN SEVERE WEATHER
## [97] HEAVY RAIN SMALL STREAM URBAN HEAVY RAIN SNOW
## [99] HEAVY SHOWER HEAVY SNOW
## [101] HEAVY SNOW AND STRONG WIND HEAVY SNOW BLIZZARD
## [103] HEAVY SNOW BLIZZARD AVALANCHE HEAVY SNOW FREEZING RAIN
## [105] HEAVY SNOW HIGH WIND FLOOD HEAVY SNOW ICE
## [107] HEAVY SNOW SHOWER HEAVY SNOW SQUALL
## [109] HEAVY SNOW WIND HEAVY SNOW WINTER STORM
## [111] HEAVY SNOWPACK HEAVY SURF
## [113] HEAVY SURF COASTAL FLOOD HEAVY SURF HIGH SURF
## [115] HEAVY SWELL HIGH SEA
## [117] HIGH SURF HIGH SURF ADVISORY
## [119] HIGH SWELL HIGH WATER
## [121] HIGH WIND HIGH WIND BLIZZARD
## [123] HIGH WIND COASTAL FLOOD HIGH WIND COLD
## [125] HIGH WIND DAMAGE HIGH WIND HEAVY RAIN
## [127] HIGH WIND HEAVY SNOW HIGH WIND SEA
## [129] HIGH WIND SNOW HURRICANE
## [131] HURRICANE GENERATED SWELL HURRICANE HIGH WIND
## [133] HVY RAIN ICE
## [135] ICE AND SNOW ICE FLOE
## [137] ICE JAM ICE JAM FLOOD
## [139] ICE JAM FLOOD MINOR ICE ROAD
## [141] ICE STORM ICE STRONG WIND
## [143] ICY ROAD LAKE EFFECT SNOW
## [145] LAKE FLOOD LAKESHORE FLOOD
## [147] LANDSLIDE LANDSLUMP
## [149] LANDSPOUT LATE SEASON SNOW
## [151] LIGHT FREEZING RAIN LIGHT SNOW
## [153] LIGHT SNOWFALL LIGHTING
## [155] LIGHTNING LIGHTNING AND HEAVY RAIN
## [157] LIGHTNING FIRE LIGHTNING HEAVY RAIN
## [159] LIGHTNING THUNDERSTORM WIND LIGHTNING WAUSEON
## [161] LIGNTNING MAJOR FLOOD
## [163] MARINE ACCIDENT MARINE HAIL
## [165] MARINE HIGH WIND MARINE STRONG WIND
## [167] MARINE THUNDERSTORM WIND MICROBURST
## [169] MICROBURST WIND MINOR FLOOD
## [171] MIXED PRECIPITATION MUD SLIDE
## [173] MUD SLIDE URBAN FLOOD MUDSLIDE
## [175] NON SEVERE WIND DAMAGE NON THUNDERSTORM WIND
## [177] OTHER RAIN
## [179] RAINSTORM RECORD COLD
## [181] RECORD RAINFALL RECORD SNOW
## [183] RIP CURRENT RIVER AND STREAM FLOOD
## [185] RIVER FLOOD ROCK SLIDE
## [187] ROUGH SURF RURAL FLOOD
## [189] SEICHE SEVERE THUNDERSTORM
## [191] SEVERE THUNDERSTORM WIND SEVERE TURBULENCE
## [193] SLEET ICE STORM SMALL HAIL
## [195] SMALL STREAM FLOOD SNOW
## [197] SNOW ACCUMULATION SNOW AND HEAVY SNOW
## [199] SNOW AND ICE SNOW AND ICE STORM
## [201] SNOW BITTER COLD SNOW BLOWING SNOW
## [203] SNOW COLD SNOW FREEZING RAIN
## [205] SNOW HEAVY SNOW SNOW HIGH WIND
## [207] SNOW ICE SNOW ICE STORM
## [209] SNOW SLEET SNOW SLEET FREEZING RAIN
## [211] SNOW SQUALL SNOWMELT FLOOD
## [213] STORM FORCE WIND STORM SURGE
## [215] STORM SURGE TIDE STRONG WIND
## [217] THUDERSTORM WIND THUNDERESTORM WIND
## [219] THUNDERSNOW THUNDERSTORM
## [221] THUNDERSTORM DAMAGE TO THUNDERSTORM HAIL
## [223] THUNDERSTORM WIN THUNDERSTORM WIND
## [225] THUNDERSTORM WIND G THUNDERSTORM WIND AND
## [227] THUNDERSTORM WIND AND LIGHTNING THUNDERSTORM WIND AWNING
## [229] THUNDERSTORM WIND DAMAGE THUNDERSTORM WIND FLOOD
## [231] THUNDERSTORM WIND FUNNEL CLOU THUNDERSTORM WIND G
## [233] THUNDERSTORM WIND HAIL THUNDERSTORM WIND LIGHTNING
## [235] THUNDERSTORM WIND TREE THUNDERSTORM WINDSHAIL
## [237] THUNDERSTORM WINDSS THUNDERSTORMW
## [239] THUNDERSTROM WIND THUNERSTORM WIND
## [241] TIDAL FLOOD TORNADO
## [243] TORNADO THUNDERSTORM WIND HAIL TROPICAL DEPRESSION
## [245] TROPICAL STORM TROPICAL STORMO
## [247] TSUNAMI TUNDERSTORM WIND
## [249] TYPHOON UNSEASONABLE COLD
## [251] UNSEASONABLY COLD UNSEASONABLY WARM
## [253] UNSEASONAL RAIN URBAN AND SMALL
## [255] URBAN FLOOD URBAN SMALL
## [257] URBAN SMALL STREAM URBAN SMALL STREAM FLOOD
## [259] URBAN SML STREAM FLD VOLCANIC ASH
## [261] WATERSPOUT WATERSPOUT TORNADO
## [263] WET MICROBURST WHIRLWIND
## [265] WILD FIRE WIND
## [267] WIND AND WAVE WIND DAMAGE
## [269] WIND HAIL WIND STORM
## [271] WINTER STORM WINTER STORM HIGH WIND
## [273] WINTER WEATHER WINTER WEATHER MIX
## [275] WINTRY MIX
## 275 Levels: AGRICULTURAL FREEZE APACHE COUNTY ... WINTRY MIX
Aggregate the data sum losses by Type
tf<-aggregate(FATALITIES ~ EVTYPE, df, sum)
ti<-aggregate(INJURIES ~ EVTYPE, df, sum)
tc<-aggregate(DGMCOSTTOTAL ~ EVTYPE, df, sum)
In the derived data frames, set as factors
tc$EVTYPE<-as.factor(tc$EVTYPE)
tf$EVTYPE<-as.factor(tf$EVTYPE)
ti$EVTYPE<-as.factor(ti$EVTYPE)
Sort decending
tc <- tc[order(-tc[,2]),]
tf <- tf[order(-tf[,2]),]
ti <- ti[order(-ti[,2]),]
Select the top 20 factors for coverage and correlation
tbllen <- 10
tch <- head(tc, tbllen)
tfh <- head(tf, tbllen)
tih <- head(ti, tbllen)
tfh5all <- data.frame( 'No.' = 1:tbllen,
'Damage Event.Type' = tch$EVTYPE,
'Damage Cost' = as.integer(tch$DGMCOSTTOTAL),
'Fatality Event.Type' = tfh$EVTYPE ,
'Fatalities' = as.integer(tfh$FATALITIES),
'Injury Event.Type' = tih$EVTYPE,
'Injuries' = as.integer(tih$INJURIES) )
Make a data frame for results presentation Fill using the total data set, not just top 20, create with tch, tfh, tih, merge with tc,tf,ti.
dfc<-data.frame('EVENT.TYPE'=as.factor(union(union(tch$EVTYPE,tfh$EVTYPE),tih$EVTYPE)))
dfc<-merge(dfc,tc, by.x='EVENT.TYPE', by.y='EVTYPE' , all.x=T)
dfc<-merge(dfc,tf, by.x='EVENT.TYPE', by.y='EVTYPE' , all.x=T)
dfc<-merge(dfc,ti, by.x='EVENT.TYPE', by.y='EVTYPE' , all.x=T)
names(dfc)<-c('EVENT.TYPE','DAMAGE.TOTAL.COST','FATALITIES','INJURIES')
#dfc[order(-dfc$DAMAGE.TOTAL.COST),]
#dfc[order(-dfc$FATALITIES),]
How correlated are the columns ? Zero Fill. require(Hmisc)
corfc <- cor(dfc$FATALITIES, dfc$DAMAGE.TOTAL.COST)
corfi <- cor(dfc$FATALITIES, dfc$INJURIES)
corci <- cor(dfc$INJURIES, dfc$DAMAGE.TOTAL.COST)
dfcor <- data.frame( 'Impact' = c('DAMAGE', 'FATALITIES','INJURIES'),
'DAMAGE' = c(0, corfc, corci),
'FATALITIES' = c(corfc, 0, corfi),
'INJURIES' = c(corci, corfi, 0)
)
This means that for the selected items, the correlation between fatalities and injuries is almost 1:1. The correlation between costs and (fatalities / injuries) is about 0.7 : 1
Of recorded total impact, versus the 20 selected, How much of total is the to 20? That is the coverage
tctotalcount<-sum(tc$DGMCOSTTOTAL)
tcheadcount<-sum(tch$DGMCOSTTOTAL )
tccoverage <- tcheadcount/tctotalcount
tftotalcount<-sum(tf$FATALITIES)
tfheadcount<-sum(tfh$FATALITIES)
tfcoverage <-tfheadcount/tftotalcount
titotalcount<-sum(ti$INJURIES)
tiheadcount<-sum(tih$INJURIES)
ticoverage <-tiheadcount/titotalcount
dfcoverage <- data.frame( 'Metric'= c('Total Count', 'Summary Count', 'Coverage'),
'Damage' = c(tctotalcount, tcheadcount, tccoverage),
'Fatalities' = c( tftotalcount, tfheadcount, tfcoverage),
'Injuries' = c( titotalcount, tiheadcount, ticoverage))
require(‘ggplot2’), require(‘forcats’)
gf.threshold<-dfc[order(-dfc$FATALITIES),][11,3]
gf <- ggplot(dfc[dfc$FATALITIES > gf.threshold,],
aes(x=fct_reorder(EVENT.TYPE,FATALITIES, .desc=F), y=FATALITIES))
gf <- gf + geom_bar(stat='identity')
gf <- gf + labs( x='EVENT TYPE', y='', title='FATALITIES')
gf <- gf + geom_text(aes(label=FATALITIES), hjust = -0.25)
gf <- gf + scale_y_continuous(labels= scales::comma,
limits=c(0, 94000 ),
expand = c(0.0, 0))
gf <- gf + theme(
plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(angle = 0, hjust = 1))
gf <- gf + coord_flip()
#gf
gi.threshold<-dfc[order(-dfc$INJURIES),][11,4]
gi<-ggplot(dfc[dfc$INJURIES > gi.threshold,],
aes(x=fct_reorder(EVENT.TYPE,INJURIES, .desc=F), y=INJURIES))
gi <- gi + geom_bar(stat='identity')
gi <- gi + labs( x='EVENT TYPE', y='', title='INJURIES')
gi <- gi + geom_text(aes(label=INJURIES), hjust= -0.25)
gi <- gi + scale_y_continuous(labels= scales::comma,
limits=c(0, 94000 ),
expand = c(0.0, 0))
gi <- gi + theme(
plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(angle = 0, hjust = 1))
gi <- gi + coord_flip()
#gi
grid.arrange(gf,gi,
#top = "NOAA Weather: Health Impact",
nrow=2,
ncol=1,
bottom = "Note: scales are the same"
)
dfc$DAMAGE.TOTAL.COST.1000 <-dfc$DAMAGE.TOTAL.COST/1000
gc.threshold<-dfc[order(-dfc$DAMAGE.TOTAL.COST.1000),][11,5]
gc<-ggplot(dfc[dfc$DAMAGE.TOTAL.COST.1000 > gc.threshold,],
aes(x=fct_reorder(EVENT.TYPE, DAMAGE.TOTAL.COST.1000, .desc=F), y=DAMAGE.TOTAL.COST.1000))
gc <- gc + geom_bar(stat='identity')
gc <- gc + labs( x='EVENT TYPE', y=' ($ 000)', title='Economic TOTAL COST')
gc <- gc + geom_text(
aes(label=sprintf('%0.0f',DAMAGE.TOTAL.COST.1000)), hjust= -0.25)
gc <- gc + scale_y_continuous(labels= scales::comma,
limits=c(0, 56000 ),
expand = c(0.0, 0))
gc <- gc + theme(
plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(angle = 0, hjust = 1))
gc <- gc + coord_flip()
#gc
grid.arrange(gc,
#top = "NOAA Weather: Economic Cost",
nrow=2,
ncol=1,
heights=c(.5,.5))
tfh5all<- format(tfh5all, scientific=FALSE)
tfh5all %>%
kable( 'html',
align=c('c','l','r','l','r','l','r')) %>%
kable_styling("striped", "bordered") %>%
add_header_above(c(" " = 1, "Damage" = 2, "Fatalities" = 2, "Injuries" = 2)) %>%
add_header_above(c(" " = 1, "Economic" = 2, "Health" = 4))
| No. | Damage.Event.Type | Damage.Cost | Fatality.Event.Type | Fatalities | Injury.Event.Type | Injuries |
|---|---|---|---|---|---|---|
| 1 | TORNADO | 48858496 | TORNADO | 5078 | TORNADO | 83632 |
| 2 | THUNDERSTORM WIND | 40996649 | FLASH FLOOD | 632 | FLOOD | 6736 |
| 3 | FLASH FLOOD | 23161714 | FLOOD | 362 | THUNDERSTORM WIND | 4546 |
| 4 | FLOOD | 14651262 | THUNDERSTORM WIND | 301 | ICE STORM | 1842 |
| 5 | HAIL | 13821333 | HIGH WIND | 171 | FLASH FLOOD | 1521 |
| 6 | LIGHTNING | 9069597 | HURRICANE | 107 | HURRICANE | 1322 |
| 7 | HIGH WIND | 5823688 | WINTER STORM | 90 | WILD FIRE | 1203 |
| 8 | WINTER STORM | 2014740 | WILD FIRE | 73 | HIGH WIND | 1096 |
| 9 | WILD FIRE | 1937840 | BLIZZARD | 63 | WINTER STORM | 1021 |
| 10 | HEAVY SNOW | 1847692 | EXCESSIVE HEAT | 56 | HEAT | 852 |
Divide that impact by total impact to get percent of impact covered by top event types. The analysis calls this ‘Coverage’.
dfcoverage<- format(dfcoverage, digits=2, nsmall=2, scientific=FALSE)
#str(dfcoverage)
dfcoverage%>%
kable( 'html' ) %>%
kable_styling("striped", "bordered")
| Metric | Damage | Fatalities | Injuries |
|---|---|---|---|
| Total Count | 171569512.76 | 7680.00 | 109556.00 |
| Summary Count | 162183015.79 | 6933.00 | 103771.00 |
| Coverage | 0.95 | 0.90 | 0.95 |
The analysis then considers correlation across event types. THe meaning for event types that are high impact are they high impact for all three categories, Damage costs, Fatalities, and injuries ?
dfcor<-format(dfcor, digits=3, nsmall=3, scientific=FALSE)
#str(dfcor)
dfcor %>%
kable( 'html') %>%
kable_styling("striped", "bordered")
| Impact | DAMAGE | FATALITIES | INJURIES |
|---|---|---|---|
| DAMAGE | 0.000 | 0.736 | 0.711 |
| FATALITIES | 0.736 | 0.000 | 0.994 |
| INJURIES | 0.711 | 0.994 | 0.000 |
Fatalities and Injuries very closely correlate. Property damage has less corrleation with Fatalities and Injuries.
The results seem to follow intuition. Further refinement is possible by consolidating the event types to a logical set. The analysis did not do this because the supporting documentation did not make it clear and the actual data did not match expectation from the document. A custom mapping would need to be made. Refinement here may not change the dominance of tornados on overall impact.