PEER ASSIGNMENT- PROCESSING USA STORM DATA AND ITS EFFECTS ON THE ECONOMY

Synopsis

##Load needed libraries and packages to analyze and visualize storm data. Load and process storm data for analysis. Analyze by unit and type of data. We use this data to answer questions like which type of events are most harmful to population health and which events have the greatest economic consequences. Finally, these are visualized by section

library(data.table)
library(ggplot2)
library(grid)
library(gridExtra)
library(knitr)

# getwd()
setwd("~/Desktop/ReproducibleResearchAssignment2")

# environment
sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] gridExtra_0.9.1  ggplot2_1.0.0    data.table_1.9.2 knitr_1.6       
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-4 digest_0.6.4     evaluate_0.5.5   formatR_0.10    
##  [5] gtable_0.1.2     MASS_7.3-33      munsell_0.4.2    plyr_1.8.1      
##  [9] proto_0.3-10     Rcpp_0.11.2      reshape2_1.4     scales_0.2.4    
## [13] stringr_0.6.2    tools_3.1.0

Load Storm Data

data <-read.csv("data.csv", header = TRUE, stringsAsFactors = FALSE)

Look at Storm Data

head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Data Processing

# Transform the exponent of property damage to numeric saved the transformed
# result into new column numericalPROPDMGEXP
data$numericalPROPDMGEXP <- rep(0, nrow(data))
#B = billion
#M = million 
#k = kilo 
#h = hundred
data$numericalPROPDMGEXP[data$PROPDMGEXP == "B"] <- 9
data$numericalPROPDMGEXP[data$PROPDMGEXP == "M"] <- 6
data$numericalPROPDMGEXP[data$PROPDMGEXP == "K"] <- 3
data$numericalPROPDMGEXP[data$PROPDMGEXP == "H"] <- 2
# digit to digit use !is.na(as.numeric(ata$PROPDMGEXP)) to check whether the
# character is a digit
digitIndex <- suppressWarnings(!is.na(as.numeric(data$PROPDMGEXP, data$PROPDMGEXP)))
data$numericalPROPDMGEXP[digitIndex] <- as.numeric(data$PROPDMGEXP[digitIndex])
data$numericalPropDmg <- data$PROPDMG * 10^(data$numericalPROPDMGEXP)
# Apply the same affect to damages to crops
data$numericalCROPDMGEXP <- rep(0, nrow(data))
data$numericalCROPDMGEXP[data$CROPDMGEXP == "B"] <- 9
data$numericalCROPDMGEXP[data$CROPDMGEXP == "M"] <- 6
data$numericalCROPDMGEXP[data$CROPDMGEXP == "K"] <- 3
data$numericalCROPDMGEXP[data$CROPDMGEXP == "H"] <- 2

digitIndex <- suppressWarnings(!is.na(as.numeric(data$CROPDMGEXP, data$CROPDMGEXP)))
data$numericalCROPDMGEXP[digitIndex] <- as.numeric(data$CROPDMGEXP[digitIndex])
data$numericalCropDmg <- data$CROPDMG * 10^(data$numericalCROPDMGEXP)
# converting type of time
data$BGN_DATE <- strptime(data$BGN_DATE, "%m/%d/%Y %H:%M:%S")

Results

Q1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

# plot number of fatalities in each event type we plot top 20 events only
agg <- aggregate(data$FATALITIES, by = list(data$EVTYPE), "sum")
agg <- agg[order(-agg$x), ][1:20, ]
agg$Group.1 <- factor(agg$Group.1, levels = agg$Group.1)
g1 <- ggplot(agg, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "red", 
    las = 3) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab("Event Type") + 
    ylab("Count") + ggtitle("Number of fatalities in each event type")
# plot number of injures in each event type we plot top 20 events only
agg <- aggregate(data$INJURIES, by = list(data$EVTYPE), "sum")
agg <- agg[order(-agg$x), ][1:20, ]
agg$Group.1 <- factor(agg$Group.1, levels = agg$Group.1)
g2 <- ggplot(agg, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "red", 
    las = 3) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab("Event Type") + 
    ylab("Count") + ggtitle("Number of injures in each event type")
# results plot
grid.arrange(g1, g2, ncol = 1)

plot of chunk unnamed-chunk-7

Q2: Across the United States, which types of events have the greatest economic consequences?

# plot property damage in each event type we plot top 20 events only
agg <- aggregate(data$numericalPropDmg, by = list(`?`(data$EVTYPE)), "sum")
## Warning: no method defined for function '$' and signature 'x =
## "data.frame"'
## Error: no documentation for function '$' and signature 'x = "data.frame"'
agg <- agg[order(-agg$x), ][1:20, ]
agg$Group.1 <- factor(agg$Group.1, levels = agg$Group.1)
g1 <- ggplot(agg, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "red", 
    las = 3) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab("Event Type") + 
    ylab("Damage") + ggtitle("Amount of property damage in each event type")
# plot crop damage in each event type we plot top 20 events only
agg <- aggregate(data$numericalCropDmg, by = list(data$EVTYPE), "sum")
agg <- agg[order(-agg$x), ][1:20, ]
agg$Group.1 <- factor(agg$Group.1, levels = agg$Group.1)
g2 <- ggplot(agg, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "red", 
    las = 3) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab("Event Type") + 
    ylab("Damage") + ggtitle("Amount of crop damage in each event type")
# plot the results
grid.arrange(g1, g2)

plot of chunk unnamed-chunk-11