In this report, I aim to describe several weather events in the United States between the years 1950 and 2011. The data are taken from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) and include weather events informations, like when and where they occur, estimates of any fatalities, injuries and property damage. The goal is to analyze the data and answer the following questions:
1- Across the United States, which types of events are most harmful with respect to population health?
2- Across the United States, which types of events have the greatest economic consequences?
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# download the storm data
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2', 'data.csv.bz2')
storm_data <- read.csv('data.csv.bz2', stringsAsFactors = F)
str(storm_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Check the data and find NA
storm_data_clearn <- select(tbl_df(storm_data), EVTYPE, FATALITIES:CROPDMGEXP)
sum(is.na(storm_data_clearn))
## [1] 0
storm_data_clearn <- group_by(storm_data_clearn,EVTYPE)
storm_data_sum <- summarise(storm_data_clearn, all_fatalities=sum(FATALITIES), all_injuries=sum(INJURIES),
all_propdmg=sum(PROPDMG), all_cropdmg=sum(CROPDMG))
table_fatalities <- arrange(select(storm_data_sum, EVTYPE, all_fatalities), desc(all_fatalities))[1:10,]
table_fatalities
## Source: local data frame [10 x 2]
##
## EVTYPE all_fatalities
## (chr) (dbl)
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
par(mar=c(9,5,1,1))
barplot(height = table_fatalities$all_fatalities, names.arg = table_fatalities$EVTYPE, main = 'Fatalities', las=2)
injuries <- arrange(select(storm_data_sum, EVTYPE, all_injuries), desc(all_injuries))[1:10,]
injuries
## Source: local data frame [10 x 2]
##
## EVTYPE all_injuries
## (chr) (dbl)
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
propdmg <- arrange(select(storm_data_sum, EVTYPE, all_propdmg), desc(all_propdmg))[1:10,]
propdmg
## Source: local data frame [10 x 2]
##
## EVTYPE all_propdmg
## (chr) (dbl)
## 1 TORNADO 3212258.2
## 2 FLASH FLOOD 1420124.6
## 3 TSTM WIND 1335965.6
## 4 FLOOD 899938.5
## 5 THUNDERSTORM WIND 876844.2
## 6 HAIL 688693.4
## 7 LIGHTNING 603351.8
## 8 THUNDERSTORM WINDS 446293.2
## 9 HIGH WIND 324731.6
## 10 WINTER STORM 132720.6
par(mar=c(9,5,1,1))
barplot(height = propdmg$all_propdmg, names.arg = propdmg$EVTYPE, main = 'Property damage', las=2)
cropdmg <- arrange(select(storm_data_sum, EVTYPE, all_cropdmg), desc(all_cropdmg))[1:10,]
cropdmg
## Source: local data frame [10 x 2]
##
## EVTYPE all_cropdmg
## (chr) (dbl)
## 1 HAIL 579596.28
## 2 FLASH FLOOD 179200.46
## 3 FLOOD 168037.88
## 4 TSTM WIND 109202.60
## 5 TORNADO 100018.52
## 6 THUNDERSTORM WIND 66791.45
## 7 DROUGHT 33898.62
## 8 THUNDERSTORM WINDS 18684.93
## 9 HIGH WIND 17283.21
## 10 HEAVY RAIN 11122.80
par(mar=c(9,5,1,1))
barplot(height = cropdmg$all_cropdmg, names.arg = cropdmg$EVTYPE, main = 'Crop damage', las=2)