Synopsis

I use the US National Weather Services’ Storm Data to analyse what natural disaster causes the most casualty and what causes the most economic damage. The data started in the early 1950s but the most comprehensive event classification did not commence until 1996. Therefore, data from 1996 until 2011 is used. The analysis shows that during the study period, heat caused the most fatalities while tornado caused the most injuries. For economic consequences, hurricane ranked the highest in terms of total as well as property damage while drought caused the most crop damage.

Data Processing

Data is downloaded from a bzip2 file.

    data<- read.csv("repdata-data-StormData.csv.bz2")

I explore the range of information in each of the relevant data fields.

    summary(data$BGN_DATE)
##  5/25/2011 0:00:00  4/27/2011 0:00:00   6/9/2011 0:00:00 
##               1202               1193               1030 
##  5/30/2004 0:00:00   4/4/2011 0:00:00   4/2/2006 0:00:00 
##               1016               1009                981 
##   4/7/2006 0:00:00  4/19/2011 0:00:00  5/31/1998 0:00:00 
##                973                970                933 
##  5/22/2011 0:00:00   6/4/2008 0:00:00  5/21/2004 0:00:00 
##                919                900                899 
##  5/26/2011 0:00:00  3/12/2006 0:00:00  6/21/2011 0:00:00 
##                885                883                876 
##   5/6/2003 0:00:00   4/3/2007 0:00:00  5/24/2004 0:00:00 
##                830                801                798 
##  6/10/2008 0:00:00  5/25/2008 0:00:00  5/18/1995 0:00:00 
##                782                779                766 
##   6/8/2008 0:00:00 10/26/2010 0:00:00  6/15/2009 0:00:00 
##                766                761                754 
##  2/11/2009 0:00:00 10/24/2001 0:00:00  4/26/2011 0:00:00 
##                739                738                736 
##   6/1/2011 0:00:00  5/10/2003 0:00:00  6/22/2006 0:00:00 
##                729                728                726 
##  1/29/2008 0:00:00  5/24/2011 0:00:00   6/6/2005 0:00:00 
##                721                720                718 
##   6/2/2004 0:00:00  4/10/2009 0:00:00  7/22/2008 0:00:00 
##                708                704                698 
##  6/18/1998 0:00:00  6/18/2011 0:00:00   6/5/2008 0:00:00 
##                695                693                689 
##  5/23/2011 0:00:00  5/25/2006 0:00:00  7/21/2003 0:00:00 
##                681                680                680 
##   8/1/2011 0:00:00  4/28/2002 0:00:00  4/18/2002 0:00:00 
##                670                669                665 
## 11/10/2002 0:00:00  6/20/2011 0:00:00   8/2/2008 0:00:00 
##                657                657                654 
##  6/16/2008 0:00:00  4/22/2005 0:00:00   6/4/2002 0:00:00 
##                649                646                639 
##  6/11/2003 0:00:00  6/15/2008 0:00:00  7/11/2011 0:00:00 
##                637                637                637 
##  4/25/2011 0:00:00  5/24/2010 0:00:00  6/18/2009 0:00:00 
##                636                631                628 
##  5/17/1999 0:00:00  4/15/2011 0:00:00  6/22/2008 0:00:00 
##                623                622                620 
##   5/4/2003 0:00:00  6/29/1998 0:00:00   7/4/2003 0:00:00 
##                615                614                611 
##   6/8/2011 0:00:00   6/1/2008 0:00:00   2/5/2008 0:00:00 
##                609                608                602 
##  7/20/2000 0:00:00  6/12/2004 0:00:00  6/17/2009 0:00:00 
##                601                596                596 
##   4/5/2011 0:00:00  5/15/1998 0:00:00  6/19/2007 0:00:00 
##                595                592                592 
##  5/18/2000 0:00:00   4/9/2011 0:00:00  6/26/2008 0:00:00 
##                589                588                588 
##   8/5/2010 0:00:00  6/14/2001 0:00:00   3/9/2006 0:00:00 
##                588                584                583 
##   6/4/2005 0:00:00  5/11/2011 0:00:00   5/2/2003 0:00:00 
##                583                580                580 
##   7/9/2003 0:00:00  6/12/2001 0:00:00  6/18/2010 0:00:00 
##                579                578                577 
##  6/26/2009 0:00:00 11/11/1995 0:00:00  6/15/2010 0:00:00 
##                573                572                567 
##  5/27/2001 0:00:00   7/5/2004 0:00:00  6/21/2006 0:00:00 
##                565                564                561 
##   6/5/2005 0:00:00   4/8/1998 0:00:00  8/18/2011 0:00:00 
##                561                559                556 
##  8/28/2011 0:00:00   7/1/2011 0:00:00   5/8/2009 0:00:00 
##                556                552                551 
##  4/26/1994 0:00:00  7/13/2004 0:00:00  6/26/2011 0:00:00 
##                550                550                549 
##            (Other) 
##             833617
    summary(data$PROPDMG)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   12.06    0.50 5000.00
    summary(data$PROPDMGEXP)
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
    summary(data$CROPDMG)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   1.527   0.000 990.000
    summary(data$CROPDMGEXP)    
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994

The data is then reformatted. First only the year information is extracted. Second, only data from 1996 onward is used. Third the abbreviation for unit representing the dollar amount of loss is converted into number. If the abbreviation is not one of the recognized ones, it will be set to 0.

    year <- data$BGN_DATE
    year <- gsub(" 0:00:00", "", year)
    year <- gsub("[0-9]+/[0-9]+/", "", year)
    year <- as.numeric(year)

    data$year <- year
    recent <- data[data$year >= 1996,]

    recent$PROPDMGEXP <- gsub("[^BbMmKkHh]", "0", recent$PROPDMGEXP )
    recent$PROPDMGEXP <- gsub("[Bb]", "1000000000", recent$PROPDMGEXP )
    recent$PROPDMGEXP <- gsub("[Mm]", "1000000", recent$PROPDMGEXP )
    recent$PROPDMGEXP <- gsub("[Kk]", "1000", recent$PROPDMGEXP )
    recent$PROPDMGEXP <- gsub("[Hh]", "100", recent$PROPDMGEXP )
    recent$CROPDMGEXP <- gsub("[^BbMmKkHh]", "0", recent$CROPDMGEXP )
    recent$CROPDMGEXP <- gsub("[Bb]", "1000000000", recent$CROPDMGEXP )
    recent$CROPDMGEXP <- gsub("[Mm]", "1000000", recent$CROPDMGEXP )
    recent$CROPDMGEXP <- gsub("[Kk]", "1000", recent$CROPDMGEXP )   
    recent$CROPDMGEXP <- gsub("[Hh]", "100", recent$CROPDMGEXP )   
    recent$PROPDMGEXP <- as.numeric(recent$PROPDMGEXP)
    recent$CROPDMGEXP <- as.numeric(recent$CROPDMGEXP)
    recent$PROPDMG <- recent$PROPDMG*recent$PROPDMGEXP
    recent$CROPDMG <- recent$CROPDMG*recent$CROPDMGEXP
    recent[is.na(recent)] <-0

I check the range of data in each of the relevant data fields to look for anomaly.

    summary(recent)
##     STATE__                   BGN_DATE             BGN_TIME     
##  Min.   : 1.00   5/25/2011 0:00:00:  1202   12:00:00 AM: 10163  
##  1st Qu.:19.00   4/27/2011 0:00:00:  1193   06:00:00 PM:  7350  
##  Median :30.00   6/9/2011 0:00:00 :  1030   04:00:00 PM:  7261  
##  Mean   :31.36   5/30/2004 0:00:00:  1016   05:00:00 PM:  6891  
##  3rd Qu.:45.00   4/4/2011 0:00:00 :  1009   12:00:00 PM:  6703  
##  Max.   :95.00   4/2/2006 0:00:00 :   981   03:00:00 PM:  6700  
##                  (Other)          :647099   (Other)    :608462  
##    TIME_ZONE          COUNTY            COUNTYNAME         STATE       
##  CST    :334329   Min.   :  1.00   WASHINGTON:  5311   TX     : 51335  
##  EST    :224876   1st Qu.: 29.00   JEFFERSON :  5112   KS     : 38649  
##  MST    : 63241   Median : 73.00   FRANKLIN  :  4374   OK     : 26980  
##  PST    : 20821   Mean   : 99.89   JACKSON   :  4352   MO     : 25802  
##  AST    :  5682   3rd Qu.:129.00   LINCOLN   :  4086   IA     : 22712  
##  HST    :  2491   Max.   :873.00   MADISON   :  3929   IL     : 21215  
##  (Other):  2090                    (Other)   :626366   (Other):466837  
##                EVTYPE         BGN_RANGE          BGN_AZI      
##  HAIL             :207715   Min.   :  0.000          :313456  
##  TSTM WIND        :128662   1st Qu.:  0.000   N      : 84182  
##  THUNDERSTORM WIND: 81402   Median :  0.000   W      : 36145  
##  FLASH FLOOD      : 50999   Mean   :  1.911   S      : 35052  
##  FLOOD            : 24247   3rd Qu.:  2.000   E      : 31224  
##  TORNADO          : 23154   Max.   :520.000   NW     : 22817  
##  (Other)          :137351                     (Other):130654  
##          BGN_LOCATI                  END_DATE             END_TIME     
##               : 90157   4/27/2011 0:00:00:  1214   06:00:00 PM:  9802  
##  COUNTYWIDE   : 19680   5/25/2011 0:00:00:  1196   05:00:00 PM:  8314  
##  SPRINGFIELD  :   843   6/9/2011 0:00:00 :  1021   04:00:00 PM:  8104  
##  SOUTH PORTION:   810   4/4/2011 0:00:00 :  1007   12:00:00 PM:  7483  
##  NORTH PORTION:   784   5/30/2004 0:00:00:   998   11:59:00 PM:  7184  
##  COLUMBIA     :   757   4/7/2006 0:00:00 :   993   03:00:00 PM:  7128  
##  (Other)      :540499   (Other)          :647101   (Other)    :605515  
##    COUNTY_END   COUNTYENDN   END_RANGE          END_AZI      
##  Min.   :0    Min.   :0    Min.   :  0.000          :477051  
##  1st Qu.:0    1st Qu.:0    1st Qu.:  0.000   N      : 27957  
##  Median :0    Median :0    Median :  0.000   S      : 22393  
##  Mean   :0    Mean   :0    Mean   :  1.341   W      : 20015  
##  3rd Qu.:0    3rd Qu.:0    3rd Qu.:  1.000   E      : 19921  
##  Max.   :0    Max.   :0    Max.   :176.000   NE     : 14498  
##                                              (Other): 71695  
##            END_LOCATI         LENGTH             WIDTH         
##                 :253369   Min.   :  0.0000   Min.   :   0.000  
##  COUNTYWIDE     : 19731   1st Qu.:  0.0000   1st Qu.:   0.000  
##  SOUTH PORTION  :   833   Median :  0.0000   Median :   0.000  
##  NORTH PORTION  :   780   Mean   :  0.1014   Mean   :   4.633  
##  CENTRAL PORTION:   617   3rd Qu.:  0.0000   3rd Qu.:   0.000  
##  SPRINGFIELD    :   575   Max.   :400.0000   Max.   :4400.000  
##  (Other)        :377625                                        
##        F                MAG            FATALITIES       
##  Min.   :0.00000   Min.   :    0.0   Min.   :  0.00000  
##  1st Qu.:0.00000   1st Qu.:    0.0   1st Qu.:  0.00000  
##  Median :0.00000   Median :   50.0   Median :  0.00000  
##  Mean   :0.02058   Mean   :   45.3   Mean   :  0.01336  
##  3rd Qu.:0.00000   3rd Qu.:   75.0   3rd Qu.:  0.00000  
##  Max.   :5.00000   Max.   :22000.0   Max.   :158.00000  
##                                                         
##     INJURIES           PROPDMG            PROPDMGEXP       
##  Min.   :0.00e+00   Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:0.00e+00   1st Qu.:0.000e+00   1st Qu.:0.000e+00  
##  Median :0.00e+00   Median :0.000e+00   Median :1.000e+03  
##  Mean   :8.87e-02   Mean   :5.612e+05   Mean   :6.081e+04  
##  3rd Qu.:0.00e+00   3rd Qu.:1.250e+03   3rd Qu.:1.000e+03  
##  Max.   :1.15e+03   Max.   :1.150e+11   Max.   :1.000e+09  
##                                                            
##     CROPDMG            CROPDMGEXP             WFO        
##  Min.   :0.000e+00   Min.   :0.000e+00   OUN    : 17393  
##  1st Qu.:0.000e+00   1st Qu.:0.000e+00   LWX    : 13174  
##  Median :0.000e+00   Median :0.000e+00   PHI    : 12551  
##  Mean   :5.318e+04   Mean   :9.257e+03   TSA    : 12483  
##  3rd Qu.:0.000e+00   3rd Qu.:1.000e+03   JAN    : 12446  
##  Max.   :1.510e+09   Max.   :1.000e+09   FWD    : 12193  
##                                          (Other):573290  
##                                STATEOFFIC    
##  TEXAS, North                       : 12193  
##  ARKANSAS, Central and North Central: 11738  
##  IOWA, Central                      : 11345  
##  KANSAS, Southwest                  : 11212  
##  GEORGIA, North and Central         : 11120  
##  KANSAS, Southeast                  : 10960  
##  (Other)                            :584962  
##                                                                                                                                                                                                     ZONENAMES     
##                                                                                                                                                                                                          :350127  
##                                                                                                                                                                                                          :205988  
##  GREATER RENO / CARSON CITY / M - GREATER RENO / CARSON CITY / M                                                                                                                                         :   624  
##  GREATER LAKE TAHOE AREA - GREATER LAKE TAHOE AREA                                                                                                                                                       :   591  
##  MADISON - MADISON                                                                                                                                                                                       :   298  
##  MONO - MONO                                                                                                                                                                                             :   294  
##  (Other)                                                                                                                                                                                                 : 95608  
##     LATITUDE      LONGITUDE        LATITUDE_E     LONGITUDE_    
##  Min.   :   0   Min.   :-14451   Min.   :   0   Min.   :-14455  
##  1st Qu.:2804   1st Qu.:  7237   1st Qu.:   0   1st Qu.:     0  
##  Median :3557   Median :  8634   Median :2923   Median :  7412  
##  Mean   :2886   Mean   :  6923   Mean   :1937   Mean   :  4681  
##  3rd Qu.:4024   3rd Qu.:  9548   3rd Qu.:3803   3rd Qu.:  9209  
##  Max.   :9706   Max.   : 17124   Max.   :9706   Max.   :106220  
##                                                                 
##                                            REMARKS           REFNUM      
##                                                : 99842   Min.   :248768  
##  Trees down.\n                                 :  1110   1st Qu.:412150  
##  Several trees were blown down.\n              :   568   Median :575533  
##  Trees were downed.\n                          :   446   Mean   :575533  
##  Large trees and power lines were blown down.\n:   432   3rd Qu.:738915  
##  A few trees were blown down.\n                :   398   Max.   :902297  
##  (Other)                                       :550734                   
##       year     
##  Min.   :1996  
##  1st Qu.:2000  
##  Median :2005  
##  Mean   :2004  
##  3rd Qu.:2008  
##  Max.   :2011  
## 

The summary on “recent” data frame shows an unusually large property damage at $110 billion. Further investigation is needed.

    recent[which(recent$PROPDMG == max(recent$PROPDMG)),]
##        STATE__         BGN_DATE    BGN_TIME TIME_ZONE COUNTY COUNTYNAME
## 605953       6 1/1/2006 0:00:00 12:00:00 AM       PST     55       NAPA
##        STATE EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI         END_DATE
## 605953    CA  FLOOD         0         COUNTYWIDE 1/1/2006 0:00:00
##           END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI END_LOCATI
## 605953 07:00:00 AM          0          0         0         COUNTYWIDE
##        LENGTH WIDTH F MAG FATALITIES INJURIES  PROPDMG PROPDMGEXP  CROPDMG
## 605953      0     0 0   0          0        0 1.15e+11      1e+09 32500000
##        CROPDMGEXP WFO          STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 605953      1e+06 MTR CALIFORNIA, Western               3828     12218
##        LATITUDE_E LONGITUDE_
## 605953       3828      12218
##                                                                                                                                                                                                                                                                                                                                                                                               REMARKS
## 605953 Major flooding continued into the early hours of January 1st, before the Napa River finally fell below flood stage and the water receeded. Flooding was severe in Downtown Napa from the Napa Creek and the City and Parks Department was hit with $6 million in damage alone. The City of Napa had 600 homes with moderate damage, 150 damaged businesses with costs of at least $70 million.
##        REFNUM year
## 605953 605943 2006

The search finds the record with original row number 605953. Its PRORPDMGEXP field is B, denoting billion. Next, we look at the original record.

    data[which(row.names(data) == 605953),]
##        STATE__         BGN_DATE    BGN_TIME TIME_ZONE COUNTY COUNTYNAME
## 605953       6 1/1/2006 0:00:00 12:00:00 AM       PST     55       NAPA
##        STATE EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI         END_DATE
## 605953    CA  FLOOD         0         COUNTYWIDE 1/1/2006 0:00:00
##           END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI END_LOCATI
## 605953 07:00:00 AM          0         NA         0         COUNTYWIDE
##        LENGTH WIDTH  F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 605953      0     0 NA   0          0        0     115          B    32.5
##        CROPDMGEXP WFO          STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 605953          M MTR CALIFORNIA, Western               3828     12218
##        LATITUDE_E LONGITUDE_
## 605953       3828      12218
##                                                                                                                                                                                                                                                                                                                                                                                               REMARKS
## 605953 Major flooding continued into the early hours of January 1st, before the Napa River finally fell below flood stage and the water receeded. Flooding was severe in Downtown Napa from the Napa Creek and the City and Parks Department was hit with $6 million in damage alone. The City of Napa had 600 homes with moderate damage, 150 damaged businesses with costs of at least $70 million.
##        REFNUM year
## 605953 605943 2006

The result shows that in the original entry, PROPDMG is 115. Using the remarks column, I determine that the correct PROPDMGEXP notation is likely “M.” Corrections are made as follows:

    recent[which(row.names(recent) == 605953),26] <-1e+06
    recent[which(row.names(recent) == 605953),25] <-115*1e+06  

The injury and fatality count and the property and crop damage are then aggregated by event types.

    injury <- aggregate(recent$INJURIES, by =list(recent$EVTYPE), FUN=sum)
    fatal <- aggregate(recent$FATALITIES, by =list(recent$EVTYPE), FUN=sum)
    propdmg <- aggregate(recent$PROPDMG, by =list(recent$EVTYPE), FUN=sum)
    cropdmg<- aggregate(recent$CROPDMG, by =list(recent$EVTYPE), FUN=sum)   
    
    # rename labels
    names(injury) <- c("Event_Type", "Injuries")
    names(fatal) <- c("Event_Type", "Fatalities")
    names(propdmg) <- c("Event_Type", "Propdamages")
    names(cropdmg) <- c("Event_Type", "Cropdamages")
   
    # merge columns, change all words to lower case
    # then take out events that have no casualty, then sort the rest
    total <- merge(injury, fatal ,by="Event_Type")
    total$Event_Type <- tolower(total$Event_Type)
    cas <- total[which(total$Injuries != 0 | total$Fatalities != 0),] 
    cas_sorted <- cas[order(cas$Fatalities, cas$Injuries),] 

    # merge columns, change all words to lower case
    # then take out events that have no damages, then sort the rest
    total_dmg <- merge(propdmg, cropdmg ,by="Event_Type")
    total_dmg$Event_Type <- tolower(total_dmg$Event_Type)
    dmg <- total_dmg[which(total_dmg$Propdamages != 0.00 | total_dmg$Cropdamages != 0.00),] 
    dmg_sorted <- dmg[order(dmg$Propdamages, dmg$Cropdamages),] 
    
    # create an empty data.frame for consolidated event types 
    options(stringsAsFactors= FALSE)
    cas_con <- data.frame(Event_Type = character(), Injuries = numeric(), Fatalities = numeric())
    dmg_con <- data.frame(Event_Type = character(), Propdamages = numeric(), Cropdamages = numeric())

The event types are consolidated based on similarities. Regular expression is used to extract event types which are similar in my opinion. For quesiton 1:

   i <- 1 #counter    
    heat <-grep("heat|warm|hyperthermia", cas_sorted$Event_Type)
    heat_rows <- cas_sorted[heat,] 
    cas_con[i,]<- c("heat", sum(heat_rows$Injuries),sum(heat_rows$Fatalities))
    cas_sorted <- cas_sorted[-heat,]
    
    i <- i+1
    tornado <-grep("tornado|waterspout|funnel", cas_sorted$Event_Type)
    tornado_rows <- cas_sorted[tornado,] 
    cas_con[i,]<- c("tornado", sum(tornado_rows$Injuries),sum(tornado_rows$Fatalities))
    cas_sorted <- cas_sorted[-tornado,]

    i <- i+1
    flood <-grep("flood|stream fld", cas_sorted$Event_Type)
    flood_rows <- cas_sorted[flood,] 
    cas_con[i,]<- c("flood", sum(flood_rows$Injuries),sum(flood_rows$Fatalities))
    cas_sorted <- cas_sorted[-flood,]

    i <- i+1
    lightning <-grep("lightning", cas_sorted$Event_Type)
    lightning_rows <- cas_sorted[lightning,] 
    cas_con[i,]<- c("lightning", sum(lightning_rows$Injuries),sum(lightning_rows$Fatalities))
    cas_sorted <- cas_sorted[-lightning,]

    i <- i+1
    current <-grep("current|surge|tide|surf|sea|wave|water|marine|drowning|swells", cas_sorted$Event_Type)
    current_rows <- cas_sorted[current,] 
    cas_con[i,]<- c("current", sum(current_rows$Injuries),sum(current_rows$Fatalities))
    cas_sorted <- cas_sorted[-current,]
 
    i <- i+1
    wind <-grep("wind", cas_sorted$Event_Type) 
    notwind <- grep("chill", cas_sorted$Event_Type)
    wind <- setdiff(wind, notwind)
    wind_rows <- cas_sorted[wind,]
    cas_con[i,]<- c("wind", sum(wind_rows$Injuries),sum(wind_rows$Fatalities))
    cas_sorted <- cas_sorted[-wind,]

    i <- i+1
    icy <-grep("snow|ice|blizzard|winter|wintry|icy|frost|glaze|freezing", cas_sorted$Event_Type)
    icy_rows <- cas_sorted[icy,]
    cas_con[i,]<- c("snow_ice_winter", sum(icy_rows$Injuries),sum(icy_rows$Fatalities))
    cas_sorted <- cas_sorted[-icy,]

    i <- i+1
    cold <-grep("cold|chill|hypothermia", cas_sorted$Event_Type)
    cold_rows <- cas_sorted[cold,]
    cas_con[i,]<- c("cold", sum(cold_rows$Injuries),sum(cold_rows$Fatalities))
    cas_sorted <- cas_sorted[-cold,]

    i <- i+1
    hurr <-grep("hurricane|tropical|typhoon", cas_sorted$Event_Type)
    hurr_rows <- cas_sorted[hurr,]
    cas_con[i,]<- c("hurricane", sum(hurr_rows$Injuries),sum(hurr_rows$Fatalities))
    cas_sorted <- cas_sorted[-hurr,]

    i <- i+1
    fire <-grep("fire", cas_sorted$Event_Type)
    fire_rows <- cas_sorted[fire,]
    cas_con[i,]<- c("fire", sum(fire_rows$Injuries),sum(fire_rows$Fatalities))
    cas_sorted <- cas_sorted[-fire,]

    i <- i+1
    fog <-grep("fog", cas_sorted$Event_Type)
    fog_rows <- cas_sorted[fog,]
    cas_con[i,]<- c("fog", sum(fog_rows$Injuries),sum(fog_rows$Fatalities))
    cas_sorted <- cas_sorted[-fog,]

    i <- i+1
    slide <-grep("slide", cas_sorted$Event_Type)
    slide_rows <- cas_sorted[slide,]
    cas_con[i,]<- c("slide", sum(slide_rows$Injuries),sum(slide_rows$Fatalities))
    cas_sorted <- cas_sorted[-slide,]

    i <- i+1
    rain <-grep("rain|recip|drizzle", cas_sorted$Event_Type)
    rain_rows <- cas_sorted[rain,]
    cas_con[i,]<- c("rain", sum(rain_rows$Injuries),sum(rain_rows$Fatalities))
    cas_sorted <- cas_sorted[-rain,]

    i <- i+1
    hail <-grep("hail", cas_sorted$Event_Type)
    hail_rows <- cas_sorted[hail,]
    cas_con[i,]<- c("hail", sum(hail_rows$Injuries),sum(hail_rows$Fatalities))
    cas_sorted <- cas_sorted[-hail,]
        
    i <- i+1
    storm <-grep("storm|dust|microburst", cas_sorted$Event_Type)
    storm_rows <- cas_sorted[storm,]
    cas_con[i,]<- c("storm", sum(storm_rows$Injuries),sum(storm_rows$Fatalities))
    cas_sorted <- cas_sorted[-storm,]

For question 2, the same consolidation is done.

    j <- 1 #counter    
    wind <-grep("wind", dmg_sorted$Event_Type) 
    notwind <- grep("chill", dmg_sorted$Event_Type)
    wind <- setdiff(wind, notwind)
    wind_rows <- dmg_sorted[wind,]
    dmg_con[j,]<- c("wind", sum(wind_rows$Propdamages),sum(wind_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-wind,]

    j <- j+1
    flood <-grep("flood|stream fld", dmg_sorted$Event_Type)
    flood_rows <- dmg_sorted[flood,] 
    dmg_con[j,]<- c("flood", sum(flood_rows$Propdamages),sum(flood_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-flood,]

    j <- j+1
    tornado <-grep("tornado|waterspout|funnel|landspout", dmg_sorted$Event_Type)
    tornado_rows <- dmg_sorted[tornado,] 
    dmg_con[j,]<- c("tornado", sum(tornado_rows$Propdamages),sum(tornado_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-tornado,]

    j <- j+1
    hail <-grep("hail", dmg_sorted$Event_Type)
    hail_rows <- dmg_sorted[hail,]
    dmg_con[j,]<- c("hail", sum(hail_rows$Propdamages),sum(hail_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-hail,]

    j <- j+1
    lightning <-grep("lightning", dmg_sorted$Event_Type)
    lightning_rows <- dmg_sorted[lightning,] 
    dmg_con[j,]<- c("lightning", sum(lightning_rows$Propdamages),sum(lightning_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-lightning,]

    j <- j+1
    icy <-grep("snow|ice|blizzard|winter|wintry|icy|frost|glaze|freezing", dmg_sorted$Event_Type)
    icy_rows <- dmg_sorted[icy,]
    dmg_con[j,]<- c("snow_ice_winter", sum(icy_rows$Propdamages),sum(icy_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-icy,]

    j <- j+1
    fire <-grep("fire", dmg_sorted$Event_Type)
    fire_rows <- dmg_sorted[fire,]
    dmg_con[j,]<- c("fire", sum(fire_rows$Propdamages),sum(fire_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-fire,]

    j <- j+1
    hurr <-grep("hurricane|tropical|typhoon", dmg_sorted$Event_Type)
    hurr_rows <- dmg_sorted[hurr,]
    dmg_con[j,]<- c("hurricane", sum(hurr_rows$Propdamages),sum(hurr_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-hurr,]

    j <- j+1
    rain <-grep("rain|recip|drizzle", dmg_sorted$Event_Type)
    rain_rows <- dmg_sorted[rain,]
    dmg_con[j,]<- c("rain", sum(rain_rows$Propdamages),sum(rain_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-rain,]

    j <- j+1
    storm <-grep("storm|dust|microburst|downburst", dmg_sorted$Event_Type)
    storm_rows <- dmg_sorted[storm,]
    dmg_con[j,]<- c("storm", sum(storm_rows$Propdamages),sum(storm_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-storm,]

    j <- j+1
    slide <-grep("slide", dmg_sorted$Event_Type)
    slide_rows <- dmg_sorted[slide,]
    dmg_con[j,]<- c("slide", sum(slide_rows$Propdamages),sum(slide_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-slide,]

    j <- j+1
    fog <-grep("fog", dmg_sorted$Event_Type)
    fog_rows <- dmg_sorted[fog,]
    dmg_con[j,]<- c("fog", sum(fog_rows$Propdamages),sum(fog_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-fog,]

    j <- j+1
    cold <-grep("cold|chill|hypothermia|freeze", dmg_sorted$Event_Type)
    cold_rows <- dmg_sorted[cold,]
    dmg_con[j,]<- c("cold", sum(cold_rows$Propdamages),sum(cold_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-cold,]

    j <- j+1
    current <-grep("current|surge|tide|surf|sea|wave|water|marine|drowning|swells|seiche", dmg_sorted$Event_Type)
    current_rows <- dmg_sorted[current,] 
    dmg_con[j,]<- c("current", sum(current_rows$Propdamages),sum(current_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-current,]

    j <- j+1
    heat <-grep("heat|warm|hyperthermia", dmg_sorted$Event_Type)
    heat_rows <- dmg_sorted[heat,] 
    dmg_con[j,]<- c("heat", sum(heat_rows$Propdamages),sum(heat_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-heat,]

    j <- j+1
    other <-grep("other", dmg_sorted$Event_Type)
    other_rows <- dmg_sorted[other,] 
    dmg_con[j,]<- c("other", sum(other_rows$Propdamages),sum(other_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-other,]

    j <- j+1
    erosion <-grep("erosion", dmg_sorted$Event_Type)
    erosion_rows <- dmg_sorted[erosion,] 
    dmg_con[j,]<- c("erosion", sum(erosion_rows$Propdamages),sum(erosion_rows$Cropdamages))
    dmg_sorted <- dmg_sorted[-erosion,]

Results

    cas_con <- rbind(cas_con, cas_sorted)
    cas_con_fatal <-cas_con[order(-as.integer(cas_con$Fatalities)),] 
    row.names(cas_con_fatal) <- NULL
    print(cas_con_fatal)
##         Event_Type Injuries Fatalities
## 1             heat     7702       2037
## 2          tornado    20670       1513
## 3            flood     8520       1337
## 4          current      858        752
## 5             wind     6614        747
## 6        lightning     4141        651
## 7  snow_ice_winter     3585        552
## 8             cold      132        382
## 9        avalanche      156        223
## 10       hurricane     1666        182
## 11            rain      260         96
## 12            fire     1458         87
## 13             fog      855         69
## 14           slide       55         43
## 15         tsunami      129         33
## 16           storm      443         21
## 17            hail      723          7
## 18         drought        4          0
## 19           other        4          0

From the above table, we see that heat caused the most fatalities, followed by tornado and flood. As for injuries, tornado, flood and heat were the top contributors in that order during the study period.

    #create a new data.frame for drawing charts
    cas_con_chart <-cas_con
    cas_con_chart$Injuries <- as.numeric(cas_con$Injuries)
    cas_con_chart$Fatalities <- as.numeric(cas_con$Fatalities)
    cas_con_chart <-cas_con_chart[order(cas_con_chart$Fatalities),] 
    #draw a chart to show both injuries and casualties
    par(mai=c(2,2,2,2))
    barplot(t(as.matrix(cas_con_chart[, c("Injuries", "Fatalities")])), beside = TRUE, names.arg = cas_con_chart$Event_Type, main = "Injuries and Fatalities by Event Type (1996-2011)", xlab="Count", horiz=TRUE, las=1,col=c("red","blue"))
    legend("bottomright", c("Injuries", "Fatalities"), fill=c("red","blue"))

    #combine events that have been consolidated and the remaining ones, then sort
    dmg_con <- rbind(dmg_con, dmg_sorted)
    dmg_con$total <- as.numeric(dmg_con$Propdamages)+as.numeric(dmg_con$Cropdamages)
    dmg_con_total <-dmg_con[order(-as.numeric(dmg_con$total)),] 
    row.names(dmg_con_total) <- NULL

    #create a new data frame for drawing a barplot
    dmg_con_chart <-dmg_con_total
    dmg_con_chart$Propdamages <- as.numeric(dmg_con_total$Propdamages)
    dmg_con_chart$Cropdamages <- as.numeric(dmg_con_total$Cropdamages)  
    dmg_con_chart$total <- as.numeric(dmg_con_total$total)
    dmg_con_plot <-dmg_con_chart[,1:3]

     #draw a chart to show both injuries and casualties
     par(mai=c(2,2,2,2))
     barplot(t(as.matrix(dmg_con_plot[, c("Propdamages", "Cropdamages")])), xaxt="n", names.arg = dmg_con_plot$Event_Type, main = "Property and Crop Damage by Event Type (1996-2011)", xlab="Damage ($B)", horiz=TRUE, las=1, col=c("red","blue"))
     axis(side=1, at=c(0,5e10, 1e11, 1.5e11), labels=c(0, 50, 100, 150))
     legend("topright", c("Property Damage", "Crop Damage"),fill=c("red","blue"))

From the above chart, we can see that for economic damage, hurricane caused the most total and property damage but drought caused the most crop damage during the study period.