Test performed on a computer with:
- Ubuntu operating system 14.0.4
- Version 0.98.1103 – © 2009-2014 RStudio
- R version 3.1.3
Author:
Marco Guado
August, Octubre 2015
Meteorological phenomena not only cause economic problems, but also affect the health of people. Prevention is the best way to counter its effects, that is why NOAA keeps track of the weather events across the US to predict its effects. The following report is a study of storms and the impact they have on the health and economy of the people.
Download file csv and reading for processing.
dir.create(‘data’)
download.file(“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”, destfile=“data/data_activity.csv.bz2”)
set.data <- read.csv(bzfile(“data/data_activity.csv.bz2”))
We get the name of each column:
names(set.data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM" "year"
For our analysis we use columns 8, 23 and 24 (EVTYPE, FATALITIES and INJURIES)
- Processed in function FATALITIES:
new.set.data <- set.data[, c(8, 23)]
weather.events <- aggregate(new.set.data$FATALITIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 168)
events <- tmp$events
sum_events <- tmp$sum_events
Result:
weather.events.fatalities = data.frame(events, sum_events)
We have a table with 168 records, but has practical effects will only use the first 10:
respta = head(weather.events.fatalities, 10)
respta
## events sum_events
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
Image Fatalities:
library(ggplot2)
ggplot(respta, aes(x=events, y=sum_events)) +
geom_bar(colour="black", stat="identity")
new.set.data <- set.data[, c(8, 24)]
weather.events <- aggregate(new.set.data$INJURIES, list(new.set.data$EVTYPE), sum)
colnames(weather.events) <- c("events", "sum_events")
weather.events.sort = weather.events[order(weather.events$sum_events, decreasing = TRUE),]
tmp = head(weather.events.sort , 158)
events <- tmp$events
sum_events <- tmp$sum_events
Result:
weather.events.injuries = data.frame(events, sum_events)
We have a table with 158 records, but has practical effects will only use the first 10:
rspta = head(weather.events.injuries, 10)
rspta
## events sum_events
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
Image Injuries:
library(ggplot2)
ggplot(rspta, aes(x=events, y=sum_events)) +
geom_bar(colour="black", stat="identity")
new.data <- set.data[, c(8, 25)]
weather.events.economics <- aggregate(new.data$PROPDMG, list(new.set.data$EVTYPE), max)
colnames(weather.events.economics) <- c("events", "max_economics")
weather.events.economics.sort = weather.events.economics[order(weather.events.economics$max_economics, decreasing = TRUE),]
tmp = head(weather.events.economics.sort , 10)
tmp
## events max_economics
## 147 FLASH FLOOD 5000
## 759 THUNDERSTORM WIND 5000
## 933 WATERSPOUT 5000
## 437 LANDSLIDE 4800
## 830 TORNADO 4410
## 164 FLOOD 3000
## 354 HIGH WIND 3000
## 47 COASTAL FLOOD 1000
## 452 LIGHTNING 1000
## 672 STRONG WIND 1000
Graphics:
events <- tmp$events
max_economics <- tmp$max_economics
rspta = data.frame(events, max_economics)
ggplot(rspta, aes(x=events, y=max_economics)) +
geom_bar(colour="black", stat="identity")