1. Data Processing
1.1 Downloading and reading
The data is retrieved from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database and is read into R with the read.csv
function. The coming analyses take into account the complete dataset.
# data loading into R from the raw CSV file
# Download file
if (!file.exists( "./data" )) {
dir.create( "./data" )
}
Warning messages:
1: Unknown or uninitialised column: 'PROPDMGEXPfactor'.
2: Unknown or uninitialised column: 'PROPDMGEXPfactor'.
3: Unknown or uninitialised column: 'PROPDMGEXPfactor'.
4: Unknown or uninitialised column: 'CROPDMGEXPfactor'.
5: Unknown or uninitialised column: 'CROPDMGEXPfactor'.
6: Unknown or uninitialised column: 'CROPDMGEXPfactor'.
file = "./data/repdata_data_StormData.csv.bz2"
fileUrl1 = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if ( !file.exists( file ) ) {
download.file( fileUrl1 , destfile = file)
}
# Read file
if ( !exists ( 'StormData.csv' ) ) {
StormData.csv = read.csv( file )
}
1.2 Fatalities summary
For the number of fatalities analysis, two columns were extracted of the original data:
EVTYPE |
Has the type of weather event |
FATALITIES |
Has the number of fatalities of each record |
The number of fatalities is calculated across each type of weather event and the ten with more fatalities are presented.
library( dplyr )
fatalities =
StormData.csv %>%
filter( FATALITIES > 0 ) %>%
select( EVTYPE, FATALITIES ) %>%
group_by( EVTYPE ) %>%
summarise( TotalFatalities = sum(FATALITIES) ) %>%
top_n( 10, TotalFatalities ) %>%
mutate( EVTYPE = as.character( EVTYPE ) ) %>%
arrange( desc(TotalFatalities) ) %>%
mutate( EVTYPE = factor( EVTYPE, levels = EVTYPE ) )
1.3 Injuries summary
For the number of injuries analysis, two columns were extracted of the original data:
EVTYPE |
Has the type of weather event |
INJURIES |
Has the number of injuries of each record |
The number of injuries is calculated across each type of weather event and the ten with more injuries are presented.
injuries =
StormData.csv %>%
filter( INJURIES > 0 ) %>%
select( EVTYPE, INJURIES ) %>%
group_by( EVTYPE ) %>%
summarise( TotalInjuries = sum(INJURIES) ) %>%
ungroup() %>%
top_n( 10, TotalInjuries ) %>%
mutate( EVTYPE = as.character( EVTYPE ) ) %>%
arrange( desc(TotalInjuries) ) %>%
mutate( EVTYPE = factor( EVTYPE, levels = EVTYPE ) )
1.4 Economical damage summary
According to the chapter 2.7 of the National Weather Service Storm Data Documentation, there is four columns relevant to the quantification of economic losses:
PROPDMG |
Property damage |
PROPDMGEXP |
Property damage factor: K= thousands, M= millions, B= billions |
CROPDMG |
Crop damage |
CROPDMGEXP |
Crop damage factor: K= thousands, M= millions, B= billions |
For this analysis the economic damage, in USD, is calculated this way:
\[DamageCost ~ = ~ PROPDMG ~ * ~ PROPDMGEXP ~ + ~ CROPDMG ~ * ~ CROPDMGEXP\]
The economical damage, in USD Billions is calculated across each type of weather event and the ten more expensive are presented.
expenses =
StormData.csv %>%
filter( (PROPDMG + CROPDMG) > 0 ) %>%
select( EVTYPE, starts_with( 'PROP'), starts_with( 'CROP' ) )
expenses$PROPDMGEXPfactor = 1
expenses$PROPDMGEXPfactor[ expenses$PROPDMGEXP %in%
c( 'K', 'k' ) ] = 1000
expenses$PROPDMGEXPfactor[ expenses$PROPDMGEXP %in%
c( 'm', 'M' ) ] = 1000000
expenses$PROPDMGEXPfactor[ expenses$PROPDMGEXP %in%
c( 'B', 'b' ) ] = 1000000000
expenses$CROPDMGEXPfactor = 1
expenses$CROPDMGEXPfactor[ expenses$CROPDMGEXP %in%
c( 'K', 'k' ) ] = 1000
expenses$CROPDMGEXPfactor[ expenses$CROPDMGEXP %in%
c( 'm', 'M' ) ] = 1000000
expenses$CROPDMGEXPfactor[ expenses$CROPDMGEXP %in%
c( 'B', 'b' ) ] = 1000000000
expenses =
expenses %>%
mutate( losses = PROPDMG * PROPDMGEXPfactor +
CROPDMG * CROPDMGEXPfactor ) %>%
group_by( EVTYPE ) %>%
summarise( EconomicLosses = sum(losses) ) %>%
ungroup() %>%
top_n( 10, EconomicLosses ) %>%
mutate( EVTYPE = as.character( EVTYPE ),
EconomicLosses = EconomicLosses / 1000000000 ) %>%
arrange( desc(EconomicLosses) ) %>%
mutate( EVTYPE = factor( EVTYPE, levels = EVTYPE ) )
2. Results
2.1 - Weather events that causes more fatalities
In the next plot, it is shown the ten weather events that have caused more fatalities across the USA.
library( ggplot2 )
# barplot( fatalities$TotalFatalities,
# legend.text = fatalities$EVTYPE )
ggplot( fatalities, aes( EVTYPE, TotalFatalities) ) +
geom_col( ) +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs( title = "2.1 - Weather events with more fatalities in USA" ,
x = '' )

2.2 - Weather events that causes more injuries
In the next plot, it is shown the ten weather events that have caused more injuries across the USA.
# barplot( injuries$TotalInjuries,
# legend.text = injuries$EVTYPE )
ggplot( injuries, aes( EVTYPE, TotalInjuries) ) +
geom_col( ) +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs( title = "2.2 - Weather events with more injuries in USA" ,
x = '' )

2.3 - Weather events that causes more economical expenses
In the next plot, it is shown the ten weather events that have caused the greatest economic losses across the USA.
# barplot( expenses$EconomicLosses,
# legend.text = expenses$EVTYPE )
ggplot( expenses, aes( EVTYPE, EconomicLosses) ) +
geom_col( ) +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs( title = "2.3 - Weather events more expensive in USA" ,
x = '', y = 'Economic damage (USD Billions)' )

