Synopsis

In this report, I aim to describe several weather events in the United States between the years 1950 and 2011. The data are taken from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) and include weather events informations, like when and where they occur, estimates of any fatalities, injuries and property damage. The goal is to analyze the data and answer the following questions:

1- Across the United States, which types of events are most harmful with respect to population health?

2- Across the United States, which types of events have the greatest economic consequences?

Loading data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# download the storm data
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2', 'data.csv.bz2')
storm_data <- read.csv('data.csv.bz2', stringsAsFactors = F)


str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Check the data and find NA 
storm_data_clearn <- select(tbl_df(storm_data), EVTYPE, FATALITIES:CROPDMGEXP)
sum(is.na(storm_data_clearn))
## [1] 0

Grouping data by event type

storm_data_clearn <- group_by(storm_data_clearn,EVTYPE)
storm_data_sum <- summarise(storm_data_clearn, all_fatalities=sum(FATALITIES), all_injuries=sum(INJURIES),
          all_propdmg=sum(PROPDMG), all_cropdmg=sum(CROPDMG))

table_fatalities <- arrange(select(storm_data_sum, EVTYPE, all_fatalities), desc(all_fatalities))[1:10,]
table_fatalities
## Source: local data frame [10 x 2]
## 
##            EVTYPE all_fatalities
##             (chr)          (dbl)
## 1         TORNADO           5633
## 2  EXCESSIVE HEAT           1903
## 3     FLASH FLOOD            978
## 4            HEAT            937
## 5       LIGHTNING            816
## 6       TSTM WIND            504
## 7           FLOOD            470
## 8     RIP CURRENT            368
## 9       HIGH WIND            248
## 10      AVALANCHE            224

Plot Fatalities

par(mar=c(9,5,1,1))
barplot(height = table_fatalities$all_fatalities, names.arg = table_fatalities$EVTYPE, main = 'Fatalities', las=2)

 injuries <- arrange(select(storm_data_sum, EVTYPE, all_injuries), desc(all_injuries))[1:10,]
 injuries
## Source: local data frame [10 x 2]
## 
##               EVTYPE all_injuries
##                (chr)        (dbl)
## 1            TORNADO        91346
## 2          TSTM WIND         6957
## 3              FLOOD         6789
## 4     EXCESSIVE HEAT         6525
## 5          LIGHTNING         5230
## 6               HEAT         2100
## 7          ICE STORM         1975
## 8        FLASH FLOOD         1777
## 9  THUNDERSTORM WIND         1488
## 10              HAIL         1361
propdmg <- arrange(select(storm_data_sum, EVTYPE, all_propdmg), desc(all_propdmg))[1:10,]
propdmg
## Source: local data frame [10 x 2]
## 
##                EVTYPE all_propdmg
##                 (chr)       (dbl)
## 1             TORNADO   3212258.2
## 2         FLASH FLOOD   1420124.6
## 3           TSTM WIND   1335965.6
## 4               FLOOD    899938.5
## 5   THUNDERSTORM WIND    876844.2
## 6                HAIL    688693.4
## 7           LIGHTNING    603351.8
## 8  THUNDERSTORM WINDS    446293.2
## 9           HIGH WIND    324731.6
## 10       WINTER STORM    132720.6

Plot property damage

par(mar=c(9,5,1,1))
barplot(height =  propdmg$all_propdmg, names.arg =  propdmg$EVTYPE, main = 'Property damage', las=2)

cropdmg <- arrange(select(storm_data_sum, EVTYPE, all_cropdmg), desc(all_cropdmg))[1:10,]
cropdmg
## Source: local data frame [10 x 2]
## 
##                EVTYPE all_cropdmg
##                 (chr)       (dbl)
## 1                HAIL   579596.28
## 2         FLASH FLOOD   179200.46
## 3               FLOOD   168037.88
## 4           TSTM WIND   109202.60
## 5             TORNADO   100018.52
## 6   THUNDERSTORM WIND    66791.45
## 7             DROUGHT    33898.62
## 8  THUNDERSTORM WINDS    18684.93
## 9           HIGH WIND    17283.21
## 10         HEAVY RAIN    11122.80

Plot crop damge

par(mar=c(9,5,1,1))
barplot(height =  cropdmg$all_cropdmg, names.arg =  cropdmg$EVTYPE, main = 'Crop damage', las=2)

Conclusion

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
    – TORNADO and HEAT.
    2.Across the United States, which types of events have the greatest economic consequences?
    – FLOOD and DROUGHT.