In United States the weather and storm events affect diffents communities across the year. Many times this events result in injuries, fatalities and property damage. That’s why is importat to prevent the outcome of this kind of events.
The National Oceanic and Atmospheric Administration (NOAA) is the responsible of get and store data of this king of events along the country and during all the year. The resulting database is published as “Storm Data”, which is a official publication of the NOAA.
Some information of the “Storm Data” is provided by outside sources such as the National Weather Service (NWS).
The main idea is to determinate what events are most harmful for poblation and also which events have more economical impact.
Original data set is avaliable on https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2The inicial columns was:
setwd("C:/Users/Javier/Downloads")
file1 <- read.csv("data.csv")
names(file1)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
The columns of the new filter and subsetted dataset are:
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
name_country <- as.vector(file1$COUNTYNAME[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
event_type <- as.vector(file1$EVTYPE[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
fatalities <- as.vector(file1$FATALITIES[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
injuries <- as.vector(file1$INJURIES[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
damage_exp <- as.vector(as.character(file1$CROPDMGEXP)[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
damage <- as.vector(file1$PROPDMG[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
file2 <- as.data.frame(cbind(name_country,event_type,fatalities,injuries,damage_exp,damage))
colnames(file2)
## [1] "name_country" "event_type" "fatalities" "injuries"
## [5] "damage_exp" "damage"
Most harmful weather events on United States between 1990 and 1999 per fatalities and injries
### Calculate total of injuries and fatalities for event type
# removing duplicates
A1 <- duplicated(file2$event_type)
A2 <- file2$event_type[!A1]
event <- as.vector(A2) # 907 events
# sum injuries + fatalities by event
event_sum <- as.vector(1:907)
for(i in 1:907)
{
event_sum[i] <- sum(as.numeric(file2$fatalities[file2$event_type == event[i]])) + sum(as.numeric(file2$injuries[file2$event_type == event[i]]))
}
# create a new dataset for analysis and plotting
event_name <- as.vector(as.character(event))
event_number <- as.numeric(as.vector(event_sum))
event_results <- cbind.data.frame(event_name,event_number)
# order by bigger, print results (head) and make a plot
res <- head(event_results[order(-event_results$event_number),])
res
## event_name event_number
## 1 HAIL 146482
## 2 TSTM WIND 144483
## 3 TORNADO 64399
## 10 THUNDERSTORM WINDS 48698
## 20 FLASH FLOOD 30351
## 15 LIGHTNING 28946
# Graphic
barplot(res$event_number,names = as.vector(head(res$event)),col = "blue",xlab = "Event",ylab = "Fatalities + Injuries", main = "Most Harmful Weather Events")
Most harmful weather events on United States between 1990 and 1999 per damages on USD
### Calculate total of injuries and fatalities for event type
exponential <- as.vector(as.character(file2$damage_exp))
exp_values <- 1:218240
for(v in 1:218240)
{
if(exponential[v] == '?') { exp_values[v] <- 0 }
else if(exponential[v] == 'B') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000000 }
else if(exponential[v] == 'M') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000 }
else if(exponential[v] == 'm') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000 }
else if(exponential[v] == 'K') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000 }
else if(exponential[v] == 'k') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000 }
else if(exponential[v] == 'H') { exp_values[v] <- as.numeric(file2$damage[v]) * 100 }
else if(exponential[v] == 'h') { exp_values[v] <- as.numeric(file2$damage[v]) * 100 }
else { exp_values[v] <- 0 }
}
file3 <- file2
file3$damage <- exp_values
event_damage <- 1:907
for(D in 1:907)
{
event_damage[D] = sum(file3$damage[file3$event_type == event[D]])
}
damage_name <- as.vector(as.character(event))
damage_number <- as.numeric(as.vector(event_damage))
damage_results <- cbind.data.frame(damage_name,damage_number)
damag <- head(damage_results[order(-damage_results$damage_number),])
damag
## damage_name damage_number
## 65 ICE STORM 449627042000
## 52 RIVER FLOOD 432781234000
## 1 HAIL 37870457000
## 36 FLOOD 30656729000
## 2 TSTM WIND 17703663000
## 20 FLASH FLOOD 11100241000
barplot(damag$damage_number,names = as.vector(head(damag$damage_name)),col = "blue",xlab = "Event",ylab = "$ Damages", main = "Most Harmful Weather Events")