Test performed on a computer with:
- Ubuntu operating system 16.04
- Version 0.99.903 – © 2009-2016 RStudio
- R version 3.3.3

1. Title - Analysis of the meteorological phenomena in the public health as well as the economic impact in municipalities and communities.

2. Objective

Prevention is important to counteract the negative effects (deaths, injuries and property damage) of meteorological phenomena. The present project aims to analyze the events recorded in the period from 1959 to November 2011 for Answer some basic questions about extreme weather phenomena

3. Process

It will consist of 5 stages in the following order:

3.1. Data loading
3.2. Data analysis
3.3. Data cleaning and transformation
3.4. Valid data processing
3.5. Analysis of results

3.1 Data loading

if( !(file.exists("data/data_activity.csv.bz2") == TRUE) ){
  dir.create('data')  
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="data/data_activity.csv.bz2")
}  
  
set.data <- read.csv(bzfile("data/data_activity.csv.bz2"))

3.2. Data analysis

We get the name of each column:

names(set.data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
head(set.data[c("EVTYPE","FATALITIES","INJURIES","PROPDMG")])
##    EVTYPE FATALITIES INJURIES PROPDMG
## 1 TORNADO          0       15    25.0
## 2 TORNADO          0        0     2.5
## 3 TORNADO          0        2    25.0
## 4 TORNADO          0        2     2.5
## 5 TORNADO          0        2     2.5
## 6 TORNADO          0        6     2.5

3.3. Data cleaning and transformation

For our analysis we use columns 8, 23 and 24 (EVTYPE, FATALITIES and INJURIES) - Processed in function FATALITIES:

new.set.data <- set.data[, c(8, 23)]
weather.events <- aggregate(new.set.data$FATALITIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 168)
events <- tmp$events
sum_events <- tmp$sum_events

Result:

weather.events.fatalities = data.frame(events, sum_events)

3.4. Valid data processing

We have a table with 168 records, but has practical effects will only use the first 10:

respta = head(weather.events.fatalities, 10)

Image Fatalities:

library(ggplot2)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
      axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(respta, aes(x=events, y=sum_events)) +
     geom_bar(colour="blue", stat="identity") + grey_theme

Processed in function INJURIES:

new.set.data <- set.data[, c(8, 24)]
weather.events <- aggregate(new.set.data$INJURIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 158)
events <- tmp$events
sum_events <- tmp$sum_events

Result:

weather.events.injuries = data.frame(events, sum_events)

We have a table with 158 records, but has practical effects will only use the first 10:

rspta = head(weather.events.injuries, 10)

Image Injuries:

library(ggplot2)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
      axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(rspta, aes(x=events, y=sum_events)) +
     geom_bar(colour="blue", stat="identity") + grey_theme

Economic losses
Economic losses as a function of weather events.

new.data <- set.data[, c(8, 25)]
weather.events.economics <- aggregate(new.data$PROPDMG, list(new.set.data$EVTYPE), max)
colnames(weather.events.economics) <- c("events", "max_economics")
weather.events.economics.sort = weather.events.economics[order(weather.events.economics$max_economics, decreasing = TRUE),]
tmp = head(weather.events.economics.sort , 10)
tmp
##                events max_economics
## 147       FLASH FLOOD          5000
## 759 THUNDERSTORM WIND          5000
## 933        WATERSPOUT          5000
## 437         LANDSLIDE          4800
## 830           TORNADO          4410
## 164             FLOOD          3000
## 354         HIGH WIND          3000
## 47      COASTAL FLOOD          1000
## 452         LIGHTNING          1000
## 672       STRONG WIND          1000

Graphics:

events <- tmp$events
max_economics <- tmp$max_economics
rspta = data.frame(events, max_economics)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
      axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(rspta, aes(x=events, y=max_economics)) +
     geom_bar(colour="blue", stat="identity") + grey_theme

3.5. Analysis of results

Observing the first graphs we can determine that the tornados are the events that more disasters produce but in the economy the events that most negatively affect a community are: . FLASH FLOOD
. THUNDERSTORM WIND
. WATERSPOUT