This project has the objectiv to explore the NOAA Stom Database. Also answer some basic questions about sever weather events. This events ocurred across the United States that was documented according to NATIONAL WEATHER SERVICE INSTRUCTION. This institution informed at AUGUST 17, 2007. The priority is to study the events that have the greatest economic consequences and also that are most harmful with respect to population helth. In order to show the trend of the Storm. The Rstudio was used in order to present the result in Rmd format.
data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
dateformat <-strptime(data$BGN_DATE,"%m/%d/%Y")
data$YEAR <- format(dateformat, '%Y')
## Load the package
library(sqldf)
summary(data$PROPDMGEXP)
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
PROPDMGNEW <- sqldf('select PROPDMG*(CASE
WHEN PROPDMGEXP="M" THEN 1000000
WHEN PROPDMGEXP="K" THEN 1000
WHEN PROPDMGEXP="m" THEN 0.001
WHEN PROPDMGEXP="h" THEN 100
WHEN PROPDMGEXP="H" THEN 100
WHEN PROPDMGEXP="1" THEN 10
WHEN PROPDMGEXP="2" THEN 100
WHEN PROPDMGEXP="3" THEN 1000
WHEN PROPDMGEXP="4" THEN 10000
WHEN PROPDMGEXP="5" THEN 100000
WHEN PROPDMGEXP="6" THEN 1000000
WHEN PROPDMGEXP="7" THEN 10000000
WHEN PROPDMGEXP="8" THEN 100000000
ELSE 1 END) FROM data')
summary(data$CROPDMGEXP)
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
CROPDMGNEW <- sqldf('select CROPDMG*(CASE
WHEN CROPDMGEXP="M" THEN 1000000
WHEN CROPDMGEXP="K" THEN 1000
WHEN CROPDMGEXP="m" THEN 0.001
WHEN CROPDMGEXP="k" THEN 100
WHEN CROPDMGEXP="2" THEN 100
ELSE 1 END) FROM data')
colnames(CROPDMGNEW) <- c("CROPDMGNEW")
colnames(PROPDMGNEW) <- c("PROPDMGNEW")
datanew <- data.frame(data,PROPDMGNEW,CROPDMGNEW)
# Get the amounts of crop per year
datasummarycrop <- aggregate(datanew$CROPDMGNEW, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummarycrop) <- c("YEAR", "EVENT", "TOTAL")
# Get the amounts of property per year
datasummaryprop <- aggregate(datanew$PROPDMGNEW, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryprop) <- c("YEAR", "EVENT", "TOTAL")
# Get the quantity of fatalities per year
datasummaryfatalit <- aggregate(datanew$FATALITIES, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryfatalit) <- c("YEAR", "EVENT", "TOTAL")
# Get the quantity of injuries per year
datasummaryinjuries <- aggregate(datanew$INJURIES, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryinjuries) <- c("YEAR", "EVENT", "TOTAL")
#The top greatest related to fatalities
datasummaryfatalittotal <- aggregate(datanew$FATALITIES, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryfatalittotal) <- c("EVENT", "TOTAL")
datasummaryfatalittotal <- datasummaryfatalittotal[order(datasummaryfatalittotal$TOTAL, decreasing=T),]
library(xtable)
fatalitiTable <- xtable(datasummaryfatalittotal[1:10,])
print(fatalitiTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:49 2016 -->
## <table border=1>
## <tr> <th> </th> <th> EVENT </th> <th> TOTAL </th> </tr>
## <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 5633.00 </td> </tr>
## <tr> <td align="right"> 130 </td> <td> EXCESSIVE HEAT </td> <td align="right"> 1903.00 </td> </tr>
## <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 978.00 </td> </tr>
## <tr> <td align="right"> 275 </td> <td> HEAT </td> <td align="right"> 937.00 </td> </tr>
## <tr> <td align="right"> 464 </td> <td> LIGHTNING </td> <td align="right"> 816.00 </td> </tr>
## <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 504.00 </td> </tr>
## <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 470.00 </td> </tr>
## <tr> <td align="right"> 585 </td> <td> RIP CURRENT </td> <td align="right"> 368.00 </td> </tr>
## <tr> <td align="right"> 359 </td> <td> HIGH WIND </td> <td align="right"> 248.00 </td> </tr>
## <tr> <td align="right"> 19 </td> <td> AVALANCHE </td> <td align="right"> 224.00 </td> </tr>
## </table>
datasummaryfatalittop <- datasummaryfatalit[datasummaryfatalit$EVENT %in% datasummaryfatalittotal[1:10,"EVENT"] & as.character(datasummaryfatalit$YEAR)>="1990" ,]
#The top greatest related to injuries
datasummaryinjuriestotal <- aggregate(datanew$INJURIES, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryinjuriestotal) <- c("EVENT", "TOTAL")
datasummaryinjuriestotal <- datasummaryinjuriestotal[order(datasummaryinjuriestotal$TOTAL, decreasing=T),]
library(xtable)
injuriesTable <- xtable(datasummaryinjuriestotal[1:10,])
print(injuriesTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:51 2016 -->
## <table border=1>
## <tr> <th> </th> <th> EVENT </th> <th> TOTAL </th> </tr>
## <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 91346.00 </td> </tr>
## <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 6957.00 </td> </tr>
## <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 6789.00 </td> </tr>
## <tr> <td align="right"> 130 </td> <td> EXCESSIVE HEAT </td> <td align="right"> 6525.00 </td> </tr>
## <tr> <td align="right"> 464 </td> <td> LIGHTNING </td> <td align="right"> 5230.00 </td> </tr>
## <tr> <td align="right"> 275 </td> <td> HEAT </td> <td align="right"> 2100.00 </td> </tr>
## <tr> <td align="right"> 427 </td> <td> ICE STORM </td> <td align="right"> 1975.00 </td> </tr>
## <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 1777.00 </td> </tr>
## <tr> <td align="right"> 760 </td> <td> THUNDERSTORM WIND </td> <td align="right"> 1488.00 </td> </tr>
## <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 1361.00 </td> </tr>
## </table>
datasummaryinjuriestop <- datasummaryinjuries[datasummaryinjuries$EVENT %in% datasummaryinjuriestotal[1:10,"EVENT"] & as.character(datasummaryinjuries$YEAR)>="1990" ,]
#The top greatest related to crop
datasummarycroptotal <- aggregate(datanew$CROPDMGNEW, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummarycroptotal) <- c("EVENT", "TOTAL")
datasummarycroptotal <- datasummarycroptotal[order(datasummarycroptotal$TOTAL, decreasing=T),]
library(xtable)
cropTable <- xtable(datasummarycroptotal[1:10,])
print(cropTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:53 2016 -->
## <table border=1>
## <tr> <th> </th> <th> EVENT </th> <th> TOTAL </th> </tr>
## <tr> <td align="right"> 95 </td> <td> DROUGHT </td> <td align="right"> 12472566001.50 </td> </tr>
## <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 5661968450.00 </td> </tr>
## <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 3025579173.00 </td> </tr>
## <tr> <td align="right"> 402 </td> <td> HURRICANE </td> <td align="right"> 2741910000.00 </td> </tr>
## <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 1421317100.00 </td> </tr>
## <tr> <td align="right"> 140 </td> <td> EXTREME COLD </td> <td align="right"> 1292973000.00 </td> </tr>
## <tr> <td align="right"> 411 </td> <td> HURRICANE/TYPHOON </td> <td align="right"> 1097872801.51 </td> </tr>
## <tr> <td align="right"> 212 </td> <td> FROST/FREEZE </td> <td align="right"> 1094086000.00 </td> </tr>
## <tr> <td align="right"> 290 </td> <td> HEAVY RAIN </td> <td align="right"> 733399800.00 </td> </tr>
## <tr> <td align="right"> 848 </td> <td> TROPICAL STORM </td> <td align="right"> 678346000.00 </td> </tr>
## </table>
datasummarycroptop <- datasummarycrop[datasummarycrop$EVENT %in% datasummarycroptotal[1:10,"EVENT"] & datasummarycrop$TOTAL>0 ,]
#The top greatest related to properties
datasummaryproptotal <- aggregate(datanew$PROPDMGNEW, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryproptotal) <- c("EVENT", "TOTAL")
datasummaryproptotal <- datasummaryproptotal[order(datasummaryproptotal$TOTAL, decreasing=T),]
library(xtable)
propTable <- xtable(datasummaryproptotal[1:10,])
print(propTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:54 2016 -->
## <table border=1>
## <tr> <th> </th> <th> EVENT </th> <th> TOTAL </th> </tr>
## <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 51635880681.81 </td> </tr>
## <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 22157709929.50 </td> </tr>
## <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 15822673979.50 </td> </tr>
## <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 13930367514.50 </td> </tr>
## <tr> <td align="right"> 402 </td> <td> HURRICANE </td> <td align="right"> 6168319015.70 </td> </tr>
## <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 4484928495.00 </td> </tr>
## <tr> <td align="right"> 359 </td> <td> HIGH WIND </td> <td align="right"> 3970046296.30 </td> </tr>
## <tr> <td align="right"> 427 </td> <td> ICE STORM </td> <td align="right"> 3944927860.00 </td> </tr>
## <tr> <td align="right"> 411 </td> <td> HURRICANE/TYPHOON </td> <td align="right"> 3805840065.50 </td> </tr>
## <tr> <td align="right"> 957 </td> <td> WILDFIRE </td> <td align="right"> 3725114001.04 </td> </tr>
## </table>
datasummaryproptop <- datasummaryprop[datasummaryprop$EVENT %in% datasummaryproptotal[1:10,"EVENT"] & as.character(datasummaryprop$YEAR)>="1990" ,]
In this plot we can see the total of fatalities and injuries, that ocurred at Severe Weather Events in EEUU.
library(ggplot2)
library(grid)
library(gridExtra)
# Get years range for setting plot x-axis
p1 <- ggplot(datasummaryfatalittop, aes(YEAR, TOTAL)) +
geom_line(aes(group=EVENT,colour=EVENT)) +
scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
ggtitle("Figure 1. Top 10 fatalities Events (by number of incidients)") + xlab("Year") + ylab("Fatalities")
# Get years range for setting plot x-axis
p2 <- ggplot(datasummaryinjuriestop, aes(YEAR, TOTAL)) +
geom_line(aes(group=EVENT,colour=EVENT)) +
scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
ggtitle("Figure 1. Top 10 Injuries Events (by number of incidients )") + xlab("Year") + ylab("Injuries")
grid.arrange(p1, p2, nrow=2 ,ncol = 1, top = "The most harmful to population helth")
In this plot we can see the total cost in miles of $ that damaged in properties and crops at Severe Weather Events in EEUU.
library(ggplot2)
library(grid)
library(gridExtra)
# Get years range for setting plot x-axis
p1 <- ggplot(datasummaryproptop, aes(YEAR, TOTAL/1000000)) +
geom_line(aes(group=EVENT,colour=EVENT)) +
scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
ggtitle("Figure 1. Top 10 Events (by total amount of properties)") + xlab("Year") + ylab("Amount of properties(millions)")
# Get years range for setting plot x-axis
p2 <- ggplot(datasummarycroptop, aes(YEAR, TOTAL/1000)) +
geom_line(aes(group=EVENT,colour=EVENT)) +
scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
ggtitle("Figure 1. Top 10 Events (by total amount of crop)") + xlab("Year") + ylab("Amount of crops (miles)")
grid.arrange(p1, p2, nrow=2 ,ncol = 1, top = "The greatest economic consequences")