Synopsis

This project has the objectiv to explore the NOAA Stom Database. Also answer some basic questions about sever weather events. This events ocurred across the United States that was documented according to NATIONAL WEATHER SERVICE INSTRUCTION. This institution informed at AUGUST 17, 2007. The priority is to study the events that have the greatest economic consequences and also that are most harmful with respect to population helth. In order to show the trend of the Storm. The Rstudio was used in order to present the result in Rmd format.

Data Processing

1.Load the data

data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))

Creating a new year field for using in the statistics

dateformat <-strptime(data$BGN_DATE,"%m/%d/%Y")

data$YEAR <-  format(dateformat, '%Y')

2.There are exponents for the property/crop damages.

Calculating the total value amounts of property/crop damages.

Creating new fields for each one

## Load the package
library(sqldf)

summary(data$PROPDMGEXP)
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
PROPDMGNEW <- sqldf('select PROPDMG*(CASE 
                                      WHEN PROPDMGEXP="M" THEN 1000000
                                      WHEN PROPDMGEXP="K" THEN 1000
                                      WHEN PROPDMGEXP="m" THEN 0.001
                                      WHEN PROPDMGEXP="h" THEN 100
                                      WHEN PROPDMGEXP="H" THEN 100
                                      WHEN PROPDMGEXP="1" THEN 10
                                      WHEN PROPDMGEXP="2" THEN 100
                                      WHEN PROPDMGEXP="3" THEN 1000
                                      WHEN PROPDMGEXP="4" THEN 10000
                                      WHEN PROPDMGEXP="5" THEN 100000
                                      WHEN PROPDMGEXP="6" THEN 1000000
                                      WHEN PROPDMGEXP="7" THEN 10000000
                                      WHEN PROPDMGEXP="8" THEN 100000000
                                      ELSE 1 END) FROM data')

summary(data$CROPDMGEXP)
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994
CROPDMGNEW <- sqldf('select CROPDMG*(CASE 
                                      WHEN CROPDMGEXP="M" THEN 1000000
                                      WHEN CROPDMGEXP="K" THEN 1000
                                      WHEN CROPDMGEXP="m" THEN 0.001
                                      WHEN CROPDMGEXP="k" THEN 100
                                      WHEN CROPDMGEXP="2" THEN 100
                                      ELSE 1 END) FROM data')
colnames(CROPDMGNEW) <- c("CROPDMGNEW")
colnames(PROPDMGNEW) <- c("PROPDMGNEW")

datanew <- data.frame(data,PROPDMGNEW,CROPDMGNEW)

3.Summary of quantity and amount of (fatalities,injuries),(crop,property) respectivily for EVTYPE and YEAR

# Get the amounts of crop per year

datasummarycrop <- aggregate(datanew$CROPDMGNEW, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)

names(datasummarycrop) <- c("YEAR", "EVENT", "TOTAL")

# Get the amounts of property per year
datasummaryprop <- aggregate(datanew$PROPDMGNEW, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)

names(datasummaryprop) <- c("YEAR", "EVENT", "TOTAL")

# Get the quantity of fatalities per year
datasummaryfatalit <- aggregate(datanew$FATALITIES, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)

names(datasummaryfatalit) <- c("YEAR", "EVENT", "TOTAL")

# Get the quantity of injuries per year
datasummaryinjuries <- aggregate(datanew$INJURIES, by = list(YEAR = as.factor(datanew$YEAR), EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)

names(datasummaryinjuries) <- c("YEAR", "EVENT", "TOTAL")

4.Identify the 10 most harmful with respect to population helth

#The top greatest related to fatalities

datasummaryfatalittotal <- aggregate(datanew$FATALITIES, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryfatalittotal) <- c("EVENT", "TOTAL")

datasummaryfatalittotal <- datasummaryfatalittotal[order(datasummaryfatalittotal$TOTAL, decreasing=T),]

library(xtable)
fatalitiTable <- xtable(datasummaryfatalittotal[1:10,])
print(fatalitiTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:49 2016 -->
## <table border=1>
## <tr> <th>  </th> <th> EVENT </th> <th> TOTAL </th>  </tr>
##   <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 5633.00 </td> </tr>
##   <tr> <td align="right"> 130 </td> <td> EXCESSIVE HEAT </td> <td align="right"> 1903.00 </td> </tr>
##   <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 978.00 </td> </tr>
##   <tr> <td align="right"> 275 </td> <td> HEAT </td> <td align="right"> 937.00 </td> </tr>
##   <tr> <td align="right"> 464 </td> <td> LIGHTNING </td> <td align="right"> 816.00 </td> </tr>
##   <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 504.00 </td> </tr>
##   <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 470.00 </td> </tr>
##   <tr> <td align="right"> 585 </td> <td> RIP CURRENT </td> <td align="right"> 368.00 </td> </tr>
##   <tr> <td align="right"> 359 </td> <td> HIGH WIND </td> <td align="right"> 248.00 </td> </tr>
##   <tr> <td align="right"> 19 </td> <td> AVALANCHE </td> <td align="right"> 224.00 </td> </tr>
##    </table>
datasummaryfatalittop <- datasummaryfatalit[datasummaryfatalit$EVENT %in% datasummaryfatalittotal[1:10,"EVENT"] &  as.character(datasummaryfatalit$YEAR)>="1990" ,]



#The top greatest related to injuries


datasummaryinjuriestotal <- aggregate(datanew$INJURIES, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryinjuriestotal) <- c("EVENT", "TOTAL")

datasummaryinjuriestotal <- datasummaryinjuriestotal[order(datasummaryinjuriestotal$TOTAL, decreasing=T),]

library(xtable)
injuriesTable <- xtable(datasummaryinjuriestotal[1:10,])
print(injuriesTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:51 2016 -->
## <table border=1>
## <tr> <th>  </th> <th> EVENT </th> <th> TOTAL </th>  </tr>
##   <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 91346.00 </td> </tr>
##   <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 6957.00 </td> </tr>
##   <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 6789.00 </td> </tr>
##   <tr> <td align="right"> 130 </td> <td> EXCESSIVE HEAT </td> <td align="right"> 6525.00 </td> </tr>
##   <tr> <td align="right"> 464 </td> <td> LIGHTNING </td> <td align="right"> 5230.00 </td> </tr>
##   <tr> <td align="right"> 275 </td> <td> HEAT </td> <td align="right"> 2100.00 </td> </tr>
##   <tr> <td align="right"> 427 </td> <td> ICE STORM </td> <td align="right"> 1975.00 </td> </tr>
##   <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 1777.00 </td> </tr>
##   <tr> <td align="right"> 760 </td> <td> THUNDERSTORM WIND </td> <td align="right"> 1488.00 </td> </tr>
##   <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 1361.00 </td> </tr>
##    </table>
datasummaryinjuriestop <- datasummaryinjuries[datasummaryinjuries$EVENT %in% datasummaryinjuriestotal[1:10,"EVENT"] &  as.character(datasummaryinjuries$YEAR)>="1990" ,]

5.Identify The TOP 10 greatest economic consequences

#The top greatest related to crop

datasummarycroptotal <- aggregate(datanew$CROPDMGNEW, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummarycroptotal) <- c("EVENT", "TOTAL")

datasummarycroptotal <- datasummarycroptotal[order(datasummarycroptotal$TOTAL, decreasing=T),]

library(xtable)
cropTable <- xtable(datasummarycroptotal[1:10,])
print(cropTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:53 2016 -->
## <table border=1>
## <tr> <th>  </th> <th> EVENT </th> <th> TOTAL </th>  </tr>
##   <tr> <td align="right"> 95 </td> <td> DROUGHT </td> <td align="right"> 12472566001.50 </td> </tr>
##   <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 5661968450.00 </td> </tr>
##   <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 3025579173.00 </td> </tr>
##   <tr> <td align="right"> 402 </td> <td> HURRICANE </td> <td align="right"> 2741910000.00 </td> </tr>
##   <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 1421317100.00 </td> </tr>
##   <tr> <td align="right"> 140 </td> <td> EXTREME COLD </td> <td align="right"> 1292973000.00 </td> </tr>
##   <tr> <td align="right"> 411 </td> <td> HURRICANE/TYPHOON </td> <td align="right"> 1097872801.51 </td> </tr>
##   <tr> <td align="right"> 212 </td> <td> FROST/FREEZE </td> <td align="right"> 1094086000.00 </td> </tr>
##   <tr> <td align="right"> 290 </td> <td> HEAVY RAIN </td> <td align="right"> 733399800.00 </td> </tr>
##   <tr> <td align="right"> 848 </td> <td> TROPICAL STORM </td> <td align="right"> 678346000.00 </td> </tr>
##    </table>
datasummarycroptop <- datasummarycrop[datasummarycrop$EVENT %in% datasummarycroptotal[1:10,"EVENT"] & datasummarycrop$TOTAL>0 ,]




#The top greatest related to properties

datasummaryproptotal <- aggregate(datanew$PROPDMGNEW, by = list(EVENT = datanew$EVTYPE), FUN = sum, na.rm = TRUE)
names(datasummaryproptotal) <- c("EVENT", "TOTAL")

datasummaryproptotal <- datasummaryproptotal[order(datasummaryproptotal$TOTAL, decreasing=T),]

library(xtable)
propTable <- xtable(datasummaryproptotal[1:10,])
print(propTable, type="html")
## <!-- html table generated in R 3.2.3 by xtable 1.8-2 package -->
## <!-- Sat May 14 12:14:54 2016 -->
## <table border=1>
## <tr> <th>  </th> <th> EVENT </th> <th> TOTAL </th>  </tr>
##   <tr> <td align="right"> 834 </td> <td> TORNADO </td> <td align="right"> 51635880681.81 </td> </tr>
##   <tr> <td align="right"> 170 </td> <td> FLOOD </td> <td align="right"> 22157709929.50 </td> </tr>
##   <tr> <td align="right"> 153 </td> <td> FLASH FLOOD </td> <td align="right"> 15822673979.50 </td> </tr>
##   <tr> <td align="right"> 244 </td> <td> HAIL </td> <td align="right"> 13930367514.50 </td> </tr>
##   <tr> <td align="right"> 402 </td> <td> HURRICANE </td> <td align="right"> 6168319015.70 </td> </tr>
##   <tr> <td align="right"> 856 </td> <td> TSTM WIND </td> <td align="right"> 4484928495.00 </td> </tr>
##   <tr> <td align="right"> 359 </td> <td> HIGH WIND </td> <td align="right"> 3970046296.30 </td> </tr>
##   <tr> <td align="right"> 427 </td> <td> ICE STORM </td> <td align="right"> 3944927860.00 </td> </tr>
##   <tr> <td align="right"> 411 </td> <td> HURRICANE/TYPHOON </td> <td align="right"> 3805840065.50 </td> </tr>
##   <tr> <td align="right"> 957 </td> <td> WILDFIRE </td> <td align="right"> 3725114001.04 </td> </tr>
##    </table>
datasummaryproptop <- datasummaryprop[datasummaryprop$EVENT %in% datasummaryproptotal[1:10,"EVENT"] & as.character(datasummaryprop$YEAR)>="1990" ,]

Result

1.The most harmful with respect to population helth

In this plot we can see the total of fatalities and injuries, that ocurred at Severe Weather Events in EEUU.

library(ggplot2)
library(grid)
library(gridExtra)


# Get years range for setting plot x-axis

p1 <- ggplot(datasummaryfatalittop, aes(YEAR, TOTAL)) +
    geom_line(aes(group=EVENT,colour=EVENT)) +
    scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
    ggtitle("Figure 1. Top 10 fatalities Events (by number of incidients)") + xlab("Year") + ylab("Fatalities")



# Get years range for setting plot x-axis

p2 <- ggplot(datasummaryinjuriestop, aes(YEAR, TOTAL)) +
    geom_line(aes(group=EVENT,colour=EVENT)) +
    scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
    ggtitle("Figure 1. Top 10 Injuries Events (by number of incidients )") + xlab("Year") + ylab("Injuries")






grid.arrange(p1, p2, nrow=2 ,ncol = 1, top = "The most harmful to population helth")

2.The greatest economic consequences

In this plot we can see the total cost in miles of $ that damaged in properties and crops at Severe Weather Events in EEUU.

library(ggplot2)
library(grid)
library(gridExtra)
        


# Get years range for setting plot x-axis

p1 <- ggplot(datasummaryproptop, aes(YEAR, TOTAL/1000000)) +
    geom_line(aes(group=EVENT,colour=EVENT)) +
    scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
    ggtitle("Figure 1. Top 10 Events (by total amount of properties)") + xlab("Year") + ylab("Amount of properties(millions)")




# Get years range for setting plot x-axis

p2 <- ggplot(datasummarycroptop, aes(YEAR, TOTAL/1000)) +
    geom_line(aes(group=EVENT,colour=EVENT)) +
      scale_x_discrete(breaks = seq(1990, 2011, by=5)) +
      ggtitle("Figure 1. Top 10 Events (by total amount of crop)") + xlab("Year") + ylab("Amount of crops (miles)")


grid.arrange(p1, p2, nrow=2 ,ncol = 1, top = "The greatest economic consequences")