Reproducible Research: Peer Assessment 2

Test performed on a computer with:
- Ubuntu operating system 14.0.4
- Version 0.98.1103 – © 2009-2014 RStudio
- R version 3.1.3

An Analysis Report of Health and Economic Impact by Severe Weather Events - Based on NOAA Storm Database

Meteorological phenomena not only cause economic problems, but also affect the health of people. Prevention is the best way to counter its effects, that is why NOAA keeps track of the weather events across the US to predict its effects. The following report is a study of storms and the impact they have on the health and economy of the people.

Data Processing

Download file csv and reading for processing.

dir.create(‘data’)
download.file(“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”, destfile=“data/data_activity.csv.bz2”)
set.data <- read.csv(bzfile(“data/data_activity.csv.bz2”))

We get the name of each column:

names(set.data)

##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"     "year"

For our analysis we use columns 8, 23 and 24 (EVTYPE, FATALITIES and INJURIES)
- Processed in function FATALITIES:

new.set.data <- set.data[, c(8, 23)]
weather.events <- aggregate(new.set.data$FATALITIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 168)
events <- tmp$events
sum_events <- tmp$sum_events

Result:

weather.events.fatalities = data.frame(events, sum_events)

We have a table with 168 records, but has practical effects will only use the first 10:

respta = head(weather.events.fatalities, 10)
respta

##            events sum_events
## 1         TORNADO       5633
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4            HEAT        937
## 5       LIGHTNING        816
## 6       TSTM WIND        504
## 7           FLOOD        470
## 8     RIP CURRENT        368
## 9       HIGH WIND        248
## 10      AVALANCHE        224

Image Fatalities:

library(ggplot2)
ggplot(respta, aes(x=events, y=sum_events)) +
     geom_bar(colour="black", stat="identity")

Processed in function INJURIES:

new.set.data <- set.data[, c(8, 24)]
weather.events <- aggregate(new.set.data$INJURIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 158)
events <- tmp$events
sum_events <- tmp$sum_events

Result:

weather.events.injuries = data.frame(events, sum_events)

We have a table with 158 records, but has practical effects will only use the first 10:

rspta = head(weather.events.injuries, 10)
rspta

##               events sum_events
## 1            TORNADO      91346
## 2          TSTM WIND       6957
## 3              FLOOD       6789
## 4     EXCESSIVE HEAT       6525
## 5          LIGHTNING       5230
## 6               HEAT       2100
## 7          ICE STORM       1975
## 8        FLASH FLOOD       1777
## 9  THUNDERSTORM WIND       1488
## 10              HAIL       1361

Image Injuries:

library(ggplot2)
ggplot(rspta, aes(x=events, y=sum_events)) +
     geom_bar(colour="black", stat="identity")

Economic losses

Economic losses as a function of weather events.

new.data <- set.data[, c(8, 25)]
weather.events.economics <- aggregate(new.data$PROPDMG, list(new.set.data$EVTYPE), max)
colnames(weather.events.economics) <- c("events", "max_economics")
weather.events.economics.sort = weather.events.economics[order(weather.events.economics$max_economics, decreasing = TRUE),]
tmp = head(weather.events.economics.sort , 10)
tmp

##                events max_economics
## 147       FLASH FLOOD          5000
## 759 THUNDERSTORM WIND          5000
## 933        WATERSPOUT          5000
## 437         LANDSLIDE          4800
## 830           TORNADO          4410
## 164             FLOOD          3000
## 354         HIGH WIND          3000
## 47      COASTAL FLOOD          1000
## 452         LIGHTNING          1000
## 672       STRONG WIND          1000

Graphics:

events <- tmp$events
max_economics <- tmp$max_economics
rspta = data.frame(events, max_economics)
ggplot(rspta, aes(x=events, y=max_economics)) +
     geom_bar(colour="black", stat="identity")