#Code for reading in the dataset and/or processing the data

dataset <-download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","./storm_dataset")
dataset <- read.csv(bzfile("storm_dataset"))
head(dataset)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
str(dataset)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
names(dataset)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

#1) Across the United States, which types of events (as indicated in the EVTYPE #variable) are most harmful with respect to population health?

#for Events we have to compare with 2 health problems i.e injuries and fatalities

#We have to aggregate both one by one the compare

#a) aggregating EVTYPE wrt injuries

total_injuries <- aggregate(INJURIES~EVTYPE, dataset, sum)
total_injuries <- arrange(total_injuries, desc(INJURIES))
total_injuries <- total_injuries[1:20, ]
total_injuries
##                EVTYPE INJURIES
## 1             TORNADO    91346
## 2           TSTM WIND     6957
## 3               FLOOD     6789
## 4      EXCESSIVE HEAT     6525
## 5           LIGHTNING     5230
## 6                HEAT     2100
## 7           ICE STORM     1975
## 8         FLASH FLOOD     1777
## 9   THUNDERSTORM WIND     1488
## 10               HAIL     1361
## 11       WINTER STORM     1321
## 12  HURRICANE/TYPHOON     1275
## 13          HIGH WIND     1137
## 14         HEAVY SNOW     1021
## 15           WILDFIRE      911
## 16 THUNDERSTORM WINDS      908
## 17           BLIZZARD      805
## 18                FOG      734
## 19   WILD/FOREST FIRE      545
## 20         DUST STORM      440

#b) aggregating EVTYPE wrt fatalities

total_fatalities <- aggregate(FATALITIES~EVTYPE,dataset, sum)
total_fatalities <- arrange(total_fatalities, desc(FATALITIES))
total_fatalities <- total_fatalities[1:20, ]
total_fatalities
##                     EVTYPE FATALITIES
## 1                  TORNADO       5633
## 2           EXCESSIVE HEAT       1903
## 3              FLASH FLOOD        978
## 4                     HEAT        937
## 5                LIGHTNING        816
## 6                TSTM WIND        504
## 7                    FLOOD        470
## 8              RIP CURRENT        368
## 9                HIGH WIND        248
## 10               AVALANCHE        224
## 11            WINTER STORM        206
## 12            RIP CURRENTS        204
## 13               HEAT WAVE        172
## 14            EXTREME COLD        160
## 15       THUNDERSTORM WIND        133
## 16              HEAVY SNOW        127
## 17 EXTREME COLD/WIND CHILL        125
## 18             STRONG WIND        103
## 19                BLIZZARD        101
## 20               HIGH SURF        101

#c) ploting

par(mfrow = c(1, 2), mar = c(15, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(total_fatalities$FATALITIES, las = 3, names.arg = total_fatalities$EVTYPE, main = "Weather Events With\n The Top 10 Highest Fatalities", ylab = "Number of Fatalities", col = total_fatalities$FATALITIES)

#OR ##########Creating double bar graphs

#d) merging both

totals<- merge(total_fatalities, total_injuries, by.x = "EVTYPE", by.y = "EVTYPE")
totals<-arrange(totals,desc(FATALITIES+INJURIES))
bad_stuff <- melt(totals, id.vars="EVTYPE", variable.name = "bad_thing")
tail(bad_stuff, 5)
##               EVTYPE bad_thing value
## 20 THUNDERSTORM WIND  INJURIES  1488
## 21      WINTER STORM  INJURIES  1321
## 22         HIGH WIND  INJURIES  1137
## 23        HEAVY SNOW  INJURIES  1021
## 24          BLIZZARD  INJURIES   805

#e) ploting

# Create chart
healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
healthChart = healthChart + geom_bar(stat="identity", aes(fill=bad_thing), position="dodge")
# Set x-axis label
healthChart = healthChart + xlab("Event Type") 
# Rotate x-axis tick labels 
healthChart = healthChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
healthChart = healthChart + ggtitle("Top 10 US Killers") + theme(plot.title = element_text(hjust = 0.5))
healthChart

#2. Across the United States, which types of events have the greatest economic #consequences?

we have property Damage and crop damage

#a) Aggregate Data for Property Damage

propdmg <- aggregate(PROPDMG ~ EVTYPE, data = dataset, FUN = sum)
propdmg <- propdmg[order(propdmg$PROPDMG, decreasing = TRUE), ]
# 10 most harmful causes of injuries
propdmgMax <- propdmg[1:10, ]
print(propdmgMax)
##                 EVTYPE   PROPDMG
## 834            TORNADO 3212258.2
## 153        FLASH FLOOD 1420124.6
## 856          TSTM WIND 1335965.6
## 170              FLOOD  899938.5
## 760  THUNDERSTORM WIND  876844.2
## 244               HAIL  688693.4
## 464          LIGHTNING  603351.8
## 786 THUNDERSTORM WINDS  446293.2
## 359          HIGH WIND  324731.6
## 972       WINTER STORM  132720.6

#b) Aggregate Data for Crop Damage

cropdmg <- aggregate(CROPDMG ~ EVTYPE, data = dataset, FUN = sum)
cropdmg <- cropdmg[order(cropdmg$CROPDMG, decreasing = TRUE), ]
# 10 most harmful causes of injuries
cropdmgMax <- cropdmg[1:10, ]
print(cropdmgMax)
##                 EVTYPE   CROPDMG
## 244               HAIL 579596.28
## 153        FLASH FLOOD 179200.46
## 170              FLOOD 168037.88
## 856          TSTM WIND 109202.60
## 834            TORNADO 100018.52
## 760  THUNDERSTORM WIND  66791.45
## 95             DROUGHT  33898.62
## 786 THUNDERSTORM WINDS  18684.93
## 359          HIGH WIND  17283.21
## 290         HEAVY RAIN  11122.80

#c)ploting

par(mfrow = c(1, 2), mar = c(15, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmgMax$PROPDMG, las = 3, names.arg = propdmgMax$EVTYPE, 
        main = "Top 10 Events with\n Greatest Property Damages", 
        ylab = "Number of Injuries", col = propdmgMax$PROPDMG)
barplot(cropdmgMax$CROPDMG, las = 3, names.arg = cropdmgMax$EVTYPE, 
        main = "Top 10 Events with\n Greatest Crop Damages", 
        ylab = "Number of Injuries", col = cropdmgMax$CROPDMG)

MAKING DOUBLE BAR GRAPH

#d)merging both

totalDamage<- merge(propdmgMax,cropdmgMax,by.x = "EVTYPE", by.y = "EVTYPE")
totalDamage<-arrange(totalDamage,desc(PROPDMG + CROPDMG))
top_10_damages <- melt(totalDamage, id.vars="EVTYPE", variable.name = "Damage_Types")
head(top_10_damages, 5)
##        EVTYPE Damage_Types     value
## 1     TORNADO      PROPDMG 3212258.2
## 2 FLASH FLOOD      PROPDMG 1420124.6
## 3   TSTM WIND      PROPDMG 1335965.6
## 4        HAIL      PROPDMG  688693.4
## 5       FLOOD      PROPDMG  899938.5

#e)ploting

# Create chart
DamageChart <- ggplot(top_10_damages, aes(x=reorder(EVTYPE, -value/100000), y=value/100000))
# Plot data as bar chart
DamageChart = DamageChart + geom_bar(stat="identity", aes(fill=Damage_Types), position="dodge")
# Set x-axis label
DamageChart = DamageChart + xlab("Event Type") +ylab("Cost of damage in $(billions)")
# Rotate x-axis tick labels 
DamageChart = DamageChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
DamageChart = DamageChart + ggtitle("Top 10 greatest economic consequences") + theme(plot.title = element_text(hjust = 0.5))
DamageChart

MAKING TRIPLE BAR GRAPH

#f)merging both and melting

#merging both
totalDamage<- merge(propdmgMax,cropdmgMax,by.x = "EVTYPE", by.y = "EVTYPE")
totalDamage$TOTALDMG <- totalDamage$PROPDMG + totalDamage$CROPDMG
totalDamage<-arrange(totalDamage,desc(TOTALDMG))
#totalDamage<-totalDamage[,c(totalDamage$EVTYPE,round(totalDamage$PROPDMG),round(totalDamage$CROPDMG),round(totalDamage$TOTALDMG))]
top_10_damages <- melt(totalDamage, id.vars="EVTYPE", variable.name = "Damage_Types")
tail(top_10_damages, 5)
##                EVTYPE Damage_Types     value
## 20               HAIL     TOTALDMG 1268289.7
## 21              FLOOD     TOTALDMG 1067976.4
## 22  THUNDERSTORM WIND     TOTALDMG  943635.6
## 23 THUNDERSTORM WINDS     TOTALDMG  464978.1
## 24          HIGH WIND     TOTALDMG  342014.8

#g)ploting

# Create chart
DamageChart <- ggplot(top_10_damages, aes(x=reorder(EVTYPE, -value/1000), y=value/1000),fill=Damage_Types)
# Plot data as bar chart
DamageChart = DamageChart + geom_bar(stat="identity", aes(fill=Damage_Types), position="dodge")
# Set x-axis label
DamageChart = DamageChart + xlab("Event Type") + ylab("Cost of damage in $(billions)")
# Rotate x-axis tick labels 
DamageChart = DamageChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
DamageChart = DamageChart + ggtitle("Top 10 greatest economic consequences") + theme(plot.title = element_text(hjust = 0.5))
DamageChart

Assignment

The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. Use of the NOAA Storm Database is to be used to address the questions below and I need to show the code for your entire analysis. The analysis can consist of tables, figures, or other summaries by using R to support the analysis.

Synopsis

The NOAA database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This project looks at severe weather events can cause both public health and economic problems.

Questions: This data analysis must address the following questions :

  • Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health ?
  • Question 2: Across the United States, which types of events have the greatest economic consequences ?

Results: Based on analysis of aggregate human and economic impacts seen with the data by storm event types :

  • Findings on Question 1: Tornado’s by far cause the most fatalities and injuries, by far the most signifcant harm to public health.
    • Excessive heat and flash flooding are second and third leading causes of fatalities.
    • Tornados cause by far more human injuries. Thunderstorm related events (Wind, Lightning, Flooding) combine to be a signficant injury contributor as well.
  • Findings on Question 2: Floods cause the most significant total damage (property and crop.)
    • While property damage is most significant with flooding, Hurricanes/Typhoons, and Tornados, and Storm Surge are also strong contributors to property damage as well
    • Crop damage occurs with flooding, but drought events are the most significant cause of economic harm crops. For crops flooding, ice storms, and hail are also noted contributors to economic harm.