MOST HARMFUL STORM EVENTS ON UNITED STATES BETWEEN 1990 AND 1999

In United States the weather and storm events affect diffents communities across the year. Many times this events result in injuries, fatalities and property damage. That’s why is importat to prevent the outcome of this kind of events.

The National Oceanic and Atmospheric Administration (NOAA) is the responsible of get and store data of this king of events along the country and during all the year. The resulting database is published as “Storm Data”, which is a official publication of the NOAA.

Some information of the “Storm Data” is provided by outside sources such as the National Weather Service (NWS).

The main idea is to determinate what events are most harmful for poblation and also which events have more economical impact.

Original data set is avaliable on https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

DATA PROCESSING:

  1. All description rows has been manually removed from the .CSV file
  2. The file was readed by the READ.CSV() function on R
  3. I made new variables with the columns filter (subsetted) by begin year (>= 1990), cause the recent data is more accurate for the analysis
  4. I made a new dataset only with the variables to use already subsetted
  5. The original dataset pass from 368,797 rows to 218,240 rows on the new dataset

The inicial columns was:

setwd("C:/Users/Javier/Downloads")
file1 <- read.csv("data.csv")
names(file1)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

The columns of the new filter and subsetted dataset are:

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
name_country <- as.vector(file1$COUNTYNAME[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
event_type <- as.vector(file1$EVTYPE[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
fatalities <- as.vector(file1$FATALITIES[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
injuries <- as.vector(file1$INJURIES[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
damage_exp <- as.vector(as.character(file1$CROPDMGEXP)[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])
damage <- as.vector(file1$PROPDMG[as.numeric(year(as.Date(file1$BGN_DATE,"%m/%d/%Y"))) >= 1990])

file2 <- as.data.frame(cbind(name_country,event_type,fatalities,injuries,damage_exp,damage))

colnames(file2)
## [1] "name_country" "event_type"   "fatalities"   "injuries"    
## [5] "damage_exp"   "damage"

RESULTS:

Most harmful weather events on United States between 1990 and 1999 per fatalities and injries

### Calculate total of injuries and fatalities for event type

# removing duplicates

A1 <- duplicated(file2$event_type)
A2 <- file2$event_type[!A1]
event <- as.vector(A2) # 907 events

# sum injuries + fatalities by event

event_sum <- as.vector(1:907)

for(i in 1:907)
{
  event_sum[i] <- sum(as.numeric(file2$fatalities[file2$event_type == event[i]])) + sum(as.numeric(file2$injuries[file2$event_type == event[i]]))   
}

# create a new dataset for analysis and plotting

event_name <- as.vector(as.character(event))
event_number <- as.numeric(as.vector(event_sum))

event_results <- cbind.data.frame(event_name,event_number)


# order by bigger, print results (head) and make a plot

res <- head(event_results[order(-event_results$event_number),])
res
##            event_name event_number
## 1                HAIL       146482
## 2           TSTM WIND       144483
## 3             TORNADO        64399
## 10 THUNDERSTORM WINDS        48698
## 20        FLASH FLOOD        30351
## 15          LIGHTNING        28946
# Graphic

barplot(res$event_number,names = as.vector(head(res$event)),col = "blue",xlab = "Event",ylab = "Fatalities + Injuries", main = "Most Harmful Weather Events")

Most harmful weather events on United States between 1990 and 1999 per damages on USD

### Calculate total of injuries and fatalities for event type

exponential <- as.vector(as.character(file2$damage_exp))

exp_values <- 1:218240

for(v in 1:218240)
{
  if(exponential[v] == '?') { exp_values[v] <- 0 }
  else if(exponential[v] == 'B') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000000 }
  else if(exponential[v] == 'M') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000 }
  else if(exponential[v] == 'm') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000000 }
  else if(exponential[v] == 'K') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000 }
  else if(exponential[v] == 'k') { exp_values[v] <- as.numeric(file2$damage[v]) * 1000 }
  else if(exponential[v] == 'H') { exp_values[v] <- as.numeric(file2$damage[v]) * 100 }
  else if(exponential[v] == 'h') { exp_values[v] <- as.numeric(file2$damage[v]) * 100 }
  else {  exp_values[v] <- 0 }
}

file3 <- file2
file3$damage <- exp_values

event_damage <- 1:907

for(D in 1:907)
{
  event_damage[D] = sum(file3$damage[file3$event_type == event[D]])
}

damage_name <- as.vector(as.character(event))
damage_number <- as.numeric(as.vector(event_damage))

damage_results <- cbind.data.frame(damage_name,damage_number)

damag <- head(damage_results[order(-damage_results$damage_number),])
damag
##    damage_name damage_number
## 65   ICE STORM  449627042000
## 52 RIVER FLOOD  432781234000
## 1         HAIL   37870457000
## 36       FLOOD   30656729000
## 2    TSTM WIND   17703663000
## 20 FLASH FLOOD   11100241000
barplot(damag$damage_number,names = as.vector(head(damag$damage_name)),col = "blue",xlab = "Event",ylab = "$ Damages", main = "Most Harmful Weather Events")