Test performed on a computer with:
- Ubuntu operating system 16.04
- Version 0.99.903 – © 2009-2016 RStudio
- R version 3.3.3
Prevention is important to counteract the negative effects (deaths, injuries and property damage) of meteorological phenomena. The present project aims to analyze the events recorded in the period from 1959 to November 2011 for Answer some basic questions about extreme weather phenomena
It will consist of 5 stages in the following order:
3.1. Data loading
3.2. Data analysis
3.3. Data cleaning and transformation
3.4. Valid data processing
3.5. Analysis of results
if( !(file.exists("data/data_activity.csv.bz2") == TRUE) ){
dir.create('data')
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="data/data_activity.csv.bz2")
}
set.data <- read.csv(bzfile("data/data_activity.csv.bz2"))
We get the name of each column:
names(set.data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
head(set.data[c("EVTYPE","FATALITIES","INJURIES","PROPDMG")])
## EVTYPE FATALITIES INJURIES PROPDMG
## 1 TORNADO 0 15 25.0
## 2 TORNADO 0 0 2.5
## 3 TORNADO 0 2 25.0
## 4 TORNADO 0 2 2.5
## 5 TORNADO 0 2 2.5
## 6 TORNADO 0 6 2.5
For our analysis we use columns 8, 23 and 24 (EVTYPE, FATALITIES and INJURIES) - Processed in function FATALITIES:
new.set.data <- set.data[, c(8, 23)]
weather.events <- aggregate(new.set.data$FATALITIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 168)
events <- tmp$events
sum_events <- tmp$sum_events
Result:
weather.events.fatalities = data.frame(events, sum_events)
We have a table with 168 records, but has practical effects will only use the first 10:
respta = head(weather.events.fatalities, 10)
Image Fatalities:
library(ggplot2)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(respta, aes(x=events, y=sum_events)) +
geom_bar(colour="blue", stat="identity") + grey_theme
Processed in function INJURIES:
new.set.data <- set.data[, c(8, 24)]
weather.events <- aggregate(new.set.data$INJURIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 158)
events <- tmp$events
sum_events <- tmp$sum_events
Result:
weather.events.injuries = data.frame(events, sum_events)
We have a table with 158 records, but has practical effects will only use the first 10:
rspta = head(weather.events.injuries, 10)
Image Injuries:
library(ggplot2)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(rspta, aes(x=events, y=sum_events)) +
geom_bar(colour="blue", stat="identity") + grey_theme
Economic losses
Economic losses as a function of weather events.
new.data <- set.data[, c(8, 25)]
weather.events.economics <- aggregate(new.data$PROPDMG, list(new.set.data$EVTYPE), max)
colnames(weather.events.economics) <- c("events", "max_economics")
weather.events.economics.sort = weather.events.economics[order(weather.events.economics$max_economics, decreasing = TRUE),]
tmp = head(weather.events.economics.sort , 10)
tmp
## events max_economics
## 147 FLASH FLOOD 5000
## 759 THUNDERSTORM WIND 5000
## 933 WATERSPOUT 5000
## 437 LANDSLIDE 4800
## 830 TORNADO 4410
## 164 FLOOD 3000
## 354 HIGH WIND 3000
## 47 COASTAL FLOOD 1000
## 452 LIGHTNING 1000
## 672 STRONG WIND 1000
Graphics:
events <- tmp$events
max_economics <- tmp$max_economics
rspta = data.frame(events, max_economics)
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size=12, angle=90, hjust=.5, vjust=.5),
axis.text.y = element_text(colour="grey20", size=12), text=element_text(size=16))
ggplot(rspta, aes(x=events, y=max_economics)) +
geom_bar(colour="blue", stat="identity") + grey_theme
Observing the first graphs we can determine that the tornados are the events that more disasters produce but in the economy the events that most negatively affect a community are: . FLASH FLOOD
. THUNDERSTORM WIND
. WATERSPOUT