Reproducible Researches. Assignment 2, Impact of sever weather events on Public Health and Economy in the United States

Synopsis

Along this project assignment the aim is to analyze the impact of severe weather events on the public health and which weather events have the greatest economic consequences.

Basic settings

echo = TRUE
options(scipen = 1)
library(ggplot2)
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.2

Data Processing, download and read the data

getwd() setwd(“c:/Users/%%/Documents/ReproducibleResearchAssginment2”) download.file(“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”,“ReproducibleResearchAssginment2/StormData.csv.bz2”) ls() rm(list = ls())

data<-read.csv("stormData.csv", sep = ",")
dim(data)
## [1] 902297     37
names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
head(data, n = 4)
##   STATE__          BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1 4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1 4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1 2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1  6/8/1951 0:00:00     0900       CST     89    MADISON    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4

Get the holistic view on the data, create a histogram

if (dim(data)[2] == 37) {
    data$year <- as.numeric(format(as.Date(data$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
}


hist(data$year, breaks = 30, main ="Tracked Weather Events",
       xlab="Year",
       col="darkgreen"
       )

Insights: The number of tracked weather events starts increasing around 1995. For the Analysis get the subset of the data starting from 1995 which should, due to the volume of available data, provide statisticly significant insights .

storm <- data[data$year >= 1995, ]
dim(storm)
## [1] 681500     38

Sever weather events and its Impact on Public Health

Get the number of injuries and fatalitiesbeing caused by the severe weather events. Get the top15 severe weather types.

sort <- function(fieldName, top = 15, dataset = data) {
        index <- which(colnames(dataset) == fieldName)
        field <- aggregate(dataset[, index], by = list(dataset$EVTYPE), FUN = "sum")
        names(field) <- c("EVTYPE", fieldName)
        field <- arrange(field, field[, 2], decreasing = T)
        field <- head(field, n = top)
        field <- within(field, EVTYPE <- factor(x = EVTYPE, levels = field$EVTYPE))   
        return(field)}

fatalities <- sort("FATALITIES", dataset = storm)
injuries <- sort("INJURIES", dataset = storm)

Sever weather events and its Impact on the Economy

Convert first crop damage and property damage into numberic.

converter <- function(dataset = storm, fieldName, newFieldName) {
    totalLen <- dim(dataset)[2]
    index <- which(colnames(dataset) == fieldName)
    dataset[, index] <- as.character(dataset[, index])
    logic <- !is.na(toupper(dataset[, index]))
    dataset[logic & toupper(dataset[, index]) == "K", index] <- "3"
    dataset[logic & toupper(dataset[, index]) == "M", index] <- "6"
    dataset[logic & toupper(dataset[, index]) == "H", index] <- "2"
    dataset[logic & toupper(dataset[, index]) == "B", index] <- "9"  
    dataset[logic & toupper(dataset[, index]) == "", index] <- "0"
    dataset[, index] <- as.numeric(dataset[, index])
    dataset[is.na(dataset[, index]), index] <- 0
    dataset <- cbind(dataset, dataset[, index - 1] * 10^dataset[, index])
    names(dataset)[totalLen + 1] <- newFieldName
    return(dataset)
}

storm <- converter(storm, "PROPDMGEXP", "propertyDamage")
## Warning in converter(storm, "PROPDMGEXP", "propertyDamage"): NAs durch
## Umwandlung erzeugt
storm <- converter(storm, "CROPDMGEXP", "cropDamage")
## Warning in converter(storm, "CROPDMGEXP", "cropDamage"): NAs durch
## Umwandlung erzeugt
names(storm)
##  [1] "STATE__"        "BGN_DATE"       "BGN_TIME"       "TIME_ZONE"     
##  [5] "COUNTY"         "COUNTYNAME"     "STATE"          "EVTYPE"        
##  [9] "BGN_RANGE"      "BGN_AZI"        "BGN_LOCATI"     "END_DATE"      
## [13] "END_TIME"       "COUNTY_END"     "COUNTYENDN"     "END_RANGE"     
## [17] "END_AZI"        "END_LOCATI"     "LENGTH"         "WIDTH"         
## [21] "F"              "MAG"            "FATALITIES"     "INJURIES"      
## [25] "PROPDMG"        "PROPDMGEXP"     "CROPDMG"        "CROPDMGEXP"    
## [29] "WFO"            "STATEOFFIC"     "ZONENAMES"      "LATITUDE"      
## [33] "LONGITUDE"      "LATITUDE_E"     "LONGITUDE_"     "REMARKS"       
## [37] "REFNUM"         "year"           "propertyDamage" "cropDamage"
options(scipen=999)
property <- sort("propertyDamage", dataset = storm)
crop <- sort("cropDamage", dataset = storm)

RESULTS

Main insights FATALITIES and INJURIES: Two sorted lists split by severe weather events and by number of affected people.

injuries
##               EVTYPE INJURIES
## 1            TORNADO    21765
## 2              FLOOD     6769
## 3     EXCESSIVE HEAT     6525
## 4          LIGHTNING     4631
## 5          TSTM WIND     3630
## 6               HEAT     2030
## 7        FLASH FLOOD     1734
## 8  THUNDERSTORM WIND     1426
## 9       WINTER STORM     1298
## 10 HURRICANE/TYPHOON     1275
## 11         HIGH WIND     1093
## 12              HAIL      916
## 13          WILDFIRE      911
## 14        HEAVY SNOW      751
## 15               FOG      718
fatalities
##               EVTYPE FATALITIES
## 1     EXCESSIVE HEAT       1903
## 2            TORNADO       1545
## 3        FLASH FLOOD        934
## 4               HEAT        924
## 5          LIGHTNING        729
## 6              FLOOD        423
## 7        RIP CURRENT        360
## 8          HIGH WIND        241
## 9          TSTM WIND        241
## 10         AVALANCHE        223
## 11      RIP CURRENTS        204
## 12      WINTER STORM        195
## 13         HEAT WAVE        161
## 14 THUNDERSTORM WIND        131
## 15      EXTREME COLD        126

Visualize the insights, total injuries by severe weather events.

fatalities_Plot <- qplot(EVTYPE, data = fatalities, weight = FATALITIES) +
               geom_bar(colour = "darkgreen",
                   fill = "darkgreen",
                   binwidth = 1)+ 
        scale_y_continuous("Nb of Fatalities") + 
        theme(axis.text.x = element_text(angle = 40, 
                                         hjust = 1)) + xlab("Weather Type") + 
        ggtitle("Total Fatalities by Severe Weather\n Events\n 1995 - 2011")

fatalities_Plot

injuries_Plot <- qplot(EVTYPE, data = fatalities, weight = FATALITIES) +
               geom_bar(colour = "darkblue",
                   fill = "darkblue",
                   binwidth = 1)+ 
        scale_y_continuous("Nb of Injuries") + 
        theme(axis.text.x = element_text(angle = 40, 
                                         hjust = 1)) + xlab("Weather Type") + 
        ggtitle("Total Injuries by Severe Weather\n Events\n 1995 - 2011")

injuries_Plot

Final conclusion on Fatalities and Injuries : According to the data the excessive heat and tornado are the main fatalities cause.tornado is the reason for the most injuries within analyze period 1995 - 2011.

Main insights PROPERTY and CROP Two sorted lists split by severe weather events and by number of affected damage.

property
##               EVTYPE propertyDamage
## 1              FLOOD   144022037057
## 2  HURRICANE/TYPHOON    69305840000
## 3        STORM SURGE    43193536000
## 4            TORNADO    24935939545
## 5        FLASH FLOOD    16047794571
## 6               HAIL    15048722103
## 7          HURRICANE    11812819010
## 8     TROPICAL STORM     7653335550
## 9          HIGH WIND     5259785375
## 10          WILDFIRE     4759064000
## 11  STORM SURGE/TIDE     4641188000
## 12         TSTM WIND     4482361440
## 13         ICE STORM     3643555810
## 14 THUNDERSTORM WIND     3399282992
## 15    HURRICANE OPAL     3172846000
crop
##               EVTYPE  cropDamage
## 1            DROUGHT 13922066000
## 2              FLOOD  5422810400
## 3          HURRICANE  2741410000
## 4               HAIL  2614127070
## 5  HURRICANE/TYPHOON  2607872800
## 6        FLASH FLOOD  1343915000
## 7       EXTREME COLD  1292473000
## 8       FROST/FREEZE  1094086000
## 9         HEAVY RAIN   728399800
## 10    TROPICAL STORM   677836000
## 11         HIGH WIND   633561300
## 12         TSTM WIND   553947350
## 13    EXCESSIVE HEAT   492402000
## 14 THUNDERSTORM WIND   414354000
## 15              HEAT   401411500

Visualize the insights, total damage by severe weather events.

property_Plot <- qplot(EVTYPE, data = property, weight = propertyDamage) +
               geom_bar(colour = "darkgreen",
                   fill = "darkgreen",
                   binwidth = 1)+ 
        scale_y_continuous("Costs Property Damage in US Dollars") + 
        theme(axis.text.x = element_text(angle = 40, 
                                         hjust = 1)) + xlab("Weather Type") + 
        ggtitle("Total Costs of Property Damage by Severe Weather\n Events\n 1995 - 2011")

property_Plot

crop_Plot <- qplot(EVTYPE, data = crop, weight = cropDamage) +
               geom_bar(colour = "darkblue",
                   fill = "darkblue",
                   binwidth = 1)+ 
        scale_y_continuous("Costs Crop Damage in US Dollars") + 
        theme(axis.text.x = element_text(angle = 40, 
                                         hjust = 1)) + xlab("Weather Type") + 
        ggtitle("Total Costs by Severe Weather\n Events\n 1995 - 2011")

crop_Plot

Final conclusion on Properties and Crop : According to the data the flood and hurricane/typhoon generates the highest costs on properties. While drought and flood generates the highest costs/lose on the field of crop within analyze period 1995 - 2011.

CONCLUSION SUMMARY

excessive heat and tornado harm the populations health the most. On the economics the highest costs are generated after flood , drought have the highest financial impact/consequences on the economics.