I use the US National Weather Services’ Storm Data to analyse what natural disaster causes the most casualty and what causes the most economic damage. The data started in the early 1950s but the most comprehensive event classification did not commence until 1996. Therefore, data from 1996 until 2011 is used. The analysis shows that during the study period, heat caused the most fatalities while tornado caused the most injuries. For economic consequences, hurricane ranked the highest in terms of total as well as property damage while drought caused the most crop damage.
Data is downloaded from a bzip2 file.
data<- read.csv("repdata-data-StormData.csv.bz2")
I explore the range of information in each of the relevant data fields.
summary(data$BGN_DATE)
## 5/25/2011 0:00:00 4/27/2011 0:00:00 6/9/2011 0:00:00
## 1202 1193 1030
## 5/30/2004 0:00:00 4/4/2011 0:00:00 4/2/2006 0:00:00
## 1016 1009 981
## 4/7/2006 0:00:00 4/19/2011 0:00:00 5/31/1998 0:00:00
## 973 970 933
## 5/22/2011 0:00:00 6/4/2008 0:00:00 5/21/2004 0:00:00
## 919 900 899
## 5/26/2011 0:00:00 3/12/2006 0:00:00 6/21/2011 0:00:00
## 885 883 876
## 5/6/2003 0:00:00 4/3/2007 0:00:00 5/24/2004 0:00:00
## 830 801 798
## 6/10/2008 0:00:00 5/25/2008 0:00:00 5/18/1995 0:00:00
## 782 779 766
## 6/8/2008 0:00:00 10/26/2010 0:00:00 6/15/2009 0:00:00
## 766 761 754
## 2/11/2009 0:00:00 10/24/2001 0:00:00 4/26/2011 0:00:00
## 739 738 736
## 6/1/2011 0:00:00 5/10/2003 0:00:00 6/22/2006 0:00:00
## 729 728 726
## 1/29/2008 0:00:00 5/24/2011 0:00:00 6/6/2005 0:00:00
## 721 720 718
## 6/2/2004 0:00:00 4/10/2009 0:00:00 7/22/2008 0:00:00
## 708 704 698
## 6/18/1998 0:00:00 6/18/2011 0:00:00 6/5/2008 0:00:00
## 695 693 689
## 5/23/2011 0:00:00 5/25/2006 0:00:00 7/21/2003 0:00:00
## 681 680 680
## 8/1/2011 0:00:00 4/28/2002 0:00:00 4/18/2002 0:00:00
## 670 669 665
## 11/10/2002 0:00:00 6/20/2011 0:00:00 8/2/2008 0:00:00
## 657 657 654
## 6/16/2008 0:00:00 4/22/2005 0:00:00 6/4/2002 0:00:00
## 649 646 639
## 6/11/2003 0:00:00 6/15/2008 0:00:00 7/11/2011 0:00:00
## 637 637 637
## 4/25/2011 0:00:00 5/24/2010 0:00:00 6/18/2009 0:00:00
## 636 631 628
## 5/17/1999 0:00:00 4/15/2011 0:00:00 6/22/2008 0:00:00
## 623 622 620
## 5/4/2003 0:00:00 6/29/1998 0:00:00 7/4/2003 0:00:00
## 615 614 611
## 6/8/2011 0:00:00 6/1/2008 0:00:00 2/5/2008 0:00:00
## 609 608 602
## 7/20/2000 0:00:00 6/12/2004 0:00:00 6/17/2009 0:00:00
## 601 596 596
## 4/5/2011 0:00:00 5/15/1998 0:00:00 6/19/2007 0:00:00
## 595 592 592
## 5/18/2000 0:00:00 4/9/2011 0:00:00 6/26/2008 0:00:00
## 589 588 588
## 8/5/2010 0:00:00 6/14/2001 0:00:00 3/9/2006 0:00:00
## 588 584 583
## 6/4/2005 0:00:00 5/11/2011 0:00:00 5/2/2003 0:00:00
## 583 580 580
## 7/9/2003 0:00:00 6/12/2001 0:00:00 6/18/2010 0:00:00
## 579 578 577
## 6/26/2009 0:00:00 11/11/1995 0:00:00 6/15/2010 0:00:00
## 573 572 567
## 5/27/2001 0:00:00 7/5/2004 0:00:00 6/21/2006 0:00:00
## 565 564 561
## 6/5/2005 0:00:00 4/8/1998 0:00:00 8/18/2011 0:00:00
## 561 559 556
## 8/28/2011 0:00:00 7/1/2011 0:00:00 5/8/2009 0:00:00
## 556 552 551
## 4/26/1994 0:00:00 7/13/2004 0:00:00 6/26/2011 0:00:00
## 550 550 549
## (Other)
## 833617
summary(data$PROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 12.06 0.50 5000.00
summary(data$PROPDMGEXP)
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
summary(data$CROPDMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 0.000 1.527 0.000 990.000
summary(data$CROPDMGEXP)
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
The data is then reformatted. First only the year information is extracted. Second, only data from 1996 onward is used. Third the abbreviation for unit representing the dollar amount of loss is converted into number. If the abbreviation is not one of the recognized ones, it will be set to 0.
year <- data$BGN_DATE
year <- gsub(" 0:00:00", "", year)
year <- gsub("[0-9]+/[0-9]+/", "", year)
year <- as.numeric(year)
data$year <- year
recent <- data[data$year >= 1996,]
recent$PROPDMGEXP <- gsub("[^BbMmKkHh]", "0", recent$PROPDMGEXP )
recent$PROPDMGEXP <- gsub("[Bb]", "1000000000", recent$PROPDMGEXP )
recent$PROPDMGEXP <- gsub("[Mm]", "1000000", recent$PROPDMGEXP )
recent$PROPDMGEXP <- gsub("[Kk]", "1000", recent$PROPDMGEXP )
recent$PROPDMGEXP <- gsub("[Hh]", "100", recent$PROPDMGEXP )
recent$CROPDMGEXP <- gsub("[^BbMmKkHh]", "0", recent$CROPDMGEXP )
recent$CROPDMGEXP <- gsub("[Bb]", "1000000000", recent$CROPDMGEXP )
recent$CROPDMGEXP <- gsub("[Mm]", "1000000", recent$CROPDMGEXP )
recent$CROPDMGEXP <- gsub("[Kk]", "1000", recent$CROPDMGEXP )
recent$CROPDMGEXP <- gsub("[Hh]", "100", recent$CROPDMGEXP )
recent$PROPDMGEXP <- as.numeric(recent$PROPDMGEXP)
recent$CROPDMGEXP <- as.numeric(recent$CROPDMGEXP)
recent$PROPDMG <- recent$PROPDMG*recent$PROPDMGEXP
recent$CROPDMG <- recent$CROPDMG*recent$CROPDMGEXP
recent[is.na(recent)] <-0
I check the range of data in each of the relevant data fields to look for anomaly.
summary(recent)
## STATE__ BGN_DATE BGN_TIME
## Min. : 1.00 5/25/2011 0:00:00: 1202 12:00:00 AM: 10163
## 1st Qu.:19.00 4/27/2011 0:00:00: 1193 06:00:00 PM: 7350
## Median :30.00 6/9/2011 0:00:00 : 1030 04:00:00 PM: 7261
## Mean :31.36 5/30/2004 0:00:00: 1016 05:00:00 PM: 6891
## 3rd Qu.:45.00 4/4/2011 0:00:00 : 1009 12:00:00 PM: 6703
## Max. :95.00 4/2/2006 0:00:00 : 981 03:00:00 PM: 6700
## (Other) :647099 (Other) :608462
## TIME_ZONE COUNTY COUNTYNAME STATE
## CST :334329 Min. : 1.00 WASHINGTON: 5311 TX : 51335
## EST :224876 1st Qu.: 29.00 JEFFERSON : 5112 KS : 38649
## MST : 63241 Median : 73.00 FRANKLIN : 4374 OK : 26980
## PST : 20821 Mean : 99.89 JACKSON : 4352 MO : 25802
## AST : 5682 3rd Qu.:129.00 LINCOLN : 4086 IA : 22712
## HST : 2491 Max. :873.00 MADISON : 3929 IL : 21215
## (Other): 2090 (Other) :626366 (Other):466837
## EVTYPE BGN_RANGE BGN_AZI
## HAIL :207715 Min. : 0.000 :313456
## TSTM WIND :128662 1st Qu.: 0.000 N : 84182
## THUNDERSTORM WIND: 81402 Median : 0.000 W : 36145
## FLASH FLOOD : 50999 Mean : 1.911 S : 35052
## FLOOD : 24247 3rd Qu.: 2.000 E : 31224
## TORNADO : 23154 Max. :520.000 NW : 22817
## (Other) :137351 (Other):130654
## BGN_LOCATI END_DATE END_TIME
## : 90157 4/27/2011 0:00:00: 1214 06:00:00 PM: 9802
## COUNTYWIDE : 19680 5/25/2011 0:00:00: 1196 05:00:00 PM: 8314
## SPRINGFIELD : 843 6/9/2011 0:00:00 : 1021 04:00:00 PM: 8104
## SOUTH PORTION: 810 4/4/2011 0:00:00 : 1007 12:00:00 PM: 7483
## NORTH PORTION: 784 5/30/2004 0:00:00: 998 11:59:00 PM: 7184
## COLUMBIA : 757 4/7/2006 0:00:00 : 993 03:00:00 PM: 7128
## (Other) :540499 (Other) :647101 (Other) :605515
## COUNTY_END COUNTYENDN END_RANGE END_AZI
## Min. :0 Min. :0 Min. : 0.000 :477051
## 1st Qu.:0 1st Qu.:0 1st Qu.: 0.000 N : 27957
## Median :0 Median :0 Median : 0.000 S : 22393
## Mean :0 Mean :0 Mean : 1.341 W : 20015
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.: 1.000 E : 19921
## Max. :0 Max. :0 Max. :176.000 NE : 14498
## (Other): 71695
## END_LOCATI LENGTH WIDTH
## :253369 Min. : 0.0000 Min. : 0.000
## COUNTYWIDE : 19731 1st Qu.: 0.0000 1st Qu.: 0.000
## SOUTH PORTION : 833 Median : 0.0000 Median : 0.000
## NORTH PORTION : 780 Mean : 0.1014 Mean : 4.633
## CENTRAL PORTION: 617 3rd Qu.: 0.0000 3rd Qu.: 0.000
## SPRINGFIELD : 575 Max. :400.0000 Max. :4400.000
## (Other) :377625
## F MAG FATALITIES
## Min. :0.00000 Min. : 0.0 Min. : 0.00000
## 1st Qu.:0.00000 1st Qu.: 0.0 1st Qu.: 0.00000
## Median :0.00000 Median : 50.0 Median : 0.00000
## Mean :0.02058 Mean : 45.3 Mean : 0.01336
## 3rd Qu.:0.00000 3rd Qu.: 75.0 3rd Qu.: 0.00000
## Max. :5.00000 Max. :22000.0 Max. :158.00000
##
## INJURIES PROPDMG PROPDMGEXP
## Min. :0.00e+00 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.00e+00 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.00e+00 Median :0.000e+00 Median :1.000e+03
## Mean :8.87e-02 Mean :5.612e+05 Mean :6.081e+04
## 3rd Qu.:0.00e+00 3rd Qu.:1.250e+03 3rd Qu.:1.000e+03
## Max. :1.15e+03 Max. :1.150e+11 Max. :1.000e+09
##
## CROPDMG CROPDMGEXP WFO
## Min. :0.000e+00 Min. :0.000e+00 OUN : 17393
## 1st Qu.:0.000e+00 1st Qu.:0.000e+00 LWX : 13174
## Median :0.000e+00 Median :0.000e+00 PHI : 12551
## Mean :5.318e+04 Mean :9.257e+03 TSA : 12483
## 3rd Qu.:0.000e+00 3rd Qu.:1.000e+03 JAN : 12446
## Max. :1.510e+09 Max. :1.000e+09 FWD : 12193
## (Other):573290
## STATEOFFIC
## TEXAS, North : 12193
## ARKANSAS, Central and North Central: 11738
## IOWA, Central : 11345
## KANSAS, Southwest : 11212
## GEORGIA, North and Central : 11120
## KANSAS, Southeast : 10960
## (Other) :584962
## ZONENAMES
## :350127
## :205988
## GREATER RENO / CARSON CITY / M - GREATER RENO / CARSON CITY / M : 624
## GREATER LAKE TAHOE AREA - GREATER LAKE TAHOE AREA : 591
## MADISON - MADISON : 298
## MONO - MONO : 294
## (Other) : 95608
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_
## Min. : 0 Min. :-14451 Min. : 0 Min. :-14455
## 1st Qu.:2804 1st Qu.: 7237 1st Qu.: 0 1st Qu.: 0
## Median :3557 Median : 8634 Median :2923 Median : 7412
## Mean :2886 Mean : 6923 Mean :1937 Mean : 4681
## 3rd Qu.:4024 3rd Qu.: 9548 3rd Qu.:3803 3rd Qu.: 9209
## Max. :9706 Max. : 17124 Max. :9706 Max. :106220
##
## REMARKS REFNUM
## : 99842 Min. :248768
## Trees down.\n : 1110 1st Qu.:412150
## Several trees were blown down.\n : 568 Median :575533
## Trees were downed.\n : 446 Mean :575533
## Large trees and power lines were blown down.\n: 432 3rd Qu.:738915
## A few trees were blown down.\n : 398 Max. :902297
## (Other) :550734
## year
## Min. :1996
## 1st Qu.:2000
## Median :2005
## Mean :2004
## 3rd Qu.:2008
## Max. :2011
##
The summary on “recent” data frame shows an unusually large property damage at $110 billion. Further investigation is needed.
recent[which(recent$PROPDMG == max(recent$PROPDMG)),]
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME
## 605953 6 1/1/2006 0:00:00 12:00:00 AM PST 55 NAPA
## STATE EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## 605953 CA FLOOD 0 COUNTYWIDE 1/1/2006 0:00:00
## END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI END_LOCATI
## 605953 07:00:00 AM 0 0 0 COUNTYWIDE
## LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 605953 0 0 0 0 0 0 1.15e+11 1e+09 32500000
## CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 605953 1e+06 MTR CALIFORNIA, Western 3828 12218
## LATITUDE_E LONGITUDE_
## 605953 3828 12218
## REMARKS
## 605953 Major flooding continued into the early hours of January 1st, before the Napa River finally fell below flood stage and the water receeded. Flooding was severe in Downtown Napa from the Napa Creek and the City and Parks Department was hit with $6 million in damage alone. The City of Napa had 600 homes with moderate damage, 150 damaged businesses with costs of at least $70 million.
## REFNUM year
## 605953 605943 2006
The search finds the record with original row number 605953. Its PRORPDMGEXP field is B, denoting billion. Next, we look at the original record.
data[which(row.names(data) == 605953),]
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME
## 605953 6 1/1/2006 0:00:00 12:00:00 AM PST 55 NAPA
## STATE EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## 605953 CA FLOOD 0 COUNTYWIDE 1/1/2006 0:00:00
## END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI END_LOCATI
## 605953 07:00:00 AM 0 NA 0 COUNTYWIDE
## LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 605953 0 0 NA 0 0 0 115 B 32.5
## CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 605953 M MTR CALIFORNIA, Western 3828 12218
## LATITUDE_E LONGITUDE_
## 605953 3828 12218
## REMARKS
## 605953 Major flooding continued into the early hours of January 1st, before the Napa River finally fell below flood stage and the water receeded. Flooding was severe in Downtown Napa from the Napa Creek and the City and Parks Department was hit with $6 million in damage alone. The City of Napa had 600 homes with moderate damage, 150 damaged businesses with costs of at least $70 million.
## REFNUM year
## 605953 605943 2006
The result shows that in the original entry, PROPDMG is 115. Using the remarks column, I determine that the correct PROPDMGEXP notation is likely “M.” Corrections are made as follows:
recent[which(row.names(recent) == 605953),26] <-1e+06
recent[which(row.names(recent) == 605953),25] <-115*1e+06
The injury and fatality count and the property and crop damage are then aggregated by event types.
injury <- aggregate(recent$INJURIES, by =list(recent$EVTYPE), FUN=sum)
fatal <- aggregate(recent$FATALITIES, by =list(recent$EVTYPE), FUN=sum)
propdmg <- aggregate(recent$PROPDMG, by =list(recent$EVTYPE), FUN=sum)
cropdmg<- aggregate(recent$CROPDMG, by =list(recent$EVTYPE), FUN=sum)
# rename labels
names(injury) <- c("Event_Type", "Injuries")
names(fatal) <- c("Event_Type", "Fatalities")
names(propdmg) <- c("Event_Type", "Propdamages")
names(cropdmg) <- c("Event_Type", "Cropdamages")
# merge columns, change all words to lower case
# then take out events that have no casualty, then sort the rest
total <- merge(injury, fatal ,by="Event_Type")
total$Event_Type <- tolower(total$Event_Type)
cas <- total[which(total$Injuries != 0 | total$Fatalities != 0),]
cas_sorted <- cas[order(cas$Fatalities, cas$Injuries),]
# merge columns, change all words to lower case
# then take out events that have no damages, then sort the rest
total_dmg <- merge(propdmg, cropdmg ,by="Event_Type")
total_dmg$Event_Type <- tolower(total_dmg$Event_Type)
dmg <- total_dmg[which(total_dmg$Propdamages != 0.00 | total_dmg$Cropdamages != 0.00),]
dmg_sorted <- dmg[order(dmg$Propdamages, dmg$Cropdamages),]
# create an empty data.frame for consolidated event types
options(stringsAsFactors= FALSE)
cas_con <- data.frame(Event_Type = character(), Injuries = numeric(), Fatalities = numeric())
dmg_con <- data.frame(Event_Type = character(), Propdamages = numeric(), Cropdamages = numeric())
The event types are consolidated based on similarities. Regular expression is used to extract event types which are similar in my opinion. For quesiton 1:
i <- 1 #counter
heat <-grep("heat|warm|hyperthermia", cas_sorted$Event_Type)
heat_rows <- cas_sorted[heat,]
cas_con[i,]<- c("heat", sum(heat_rows$Injuries),sum(heat_rows$Fatalities))
cas_sorted <- cas_sorted[-heat,]
i <- i+1
tornado <-grep("tornado|waterspout|funnel", cas_sorted$Event_Type)
tornado_rows <- cas_sorted[tornado,]
cas_con[i,]<- c("tornado", sum(tornado_rows$Injuries),sum(tornado_rows$Fatalities))
cas_sorted <- cas_sorted[-tornado,]
i <- i+1
flood <-grep("flood|stream fld", cas_sorted$Event_Type)
flood_rows <- cas_sorted[flood,]
cas_con[i,]<- c("flood", sum(flood_rows$Injuries),sum(flood_rows$Fatalities))
cas_sorted <- cas_sorted[-flood,]
i <- i+1
lightning <-grep("lightning", cas_sorted$Event_Type)
lightning_rows <- cas_sorted[lightning,]
cas_con[i,]<- c("lightning", sum(lightning_rows$Injuries),sum(lightning_rows$Fatalities))
cas_sorted <- cas_sorted[-lightning,]
i <- i+1
current <-grep("current|surge|tide|surf|sea|wave|water|marine|drowning|swells", cas_sorted$Event_Type)
current_rows <- cas_sorted[current,]
cas_con[i,]<- c("current", sum(current_rows$Injuries),sum(current_rows$Fatalities))
cas_sorted <- cas_sorted[-current,]
i <- i+1
wind <-grep("wind", cas_sorted$Event_Type)
notwind <- grep("chill", cas_sorted$Event_Type)
wind <- setdiff(wind, notwind)
wind_rows <- cas_sorted[wind,]
cas_con[i,]<- c("wind", sum(wind_rows$Injuries),sum(wind_rows$Fatalities))
cas_sorted <- cas_sorted[-wind,]
i <- i+1
icy <-grep("snow|ice|blizzard|winter|wintry|icy|frost|glaze|freezing", cas_sorted$Event_Type)
icy_rows <- cas_sorted[icy,]
cas_con[i,]<- c("snow_ice_winter", sum(icy_rows$Injuries),sum(icy_rows$Fatalities))
cas_sorted <- cas_sorted[-icy,]
i <- i+1
cold <-grep("cold|chill|hypothermia", cas_sorted$Event_Type)
cold_rows <- cas_sorted[cold,]
cas_con[i,]<- c("cold", sum(cold_rows$Injuries),sum(cold_rows$Fatalities))
cas_sorted <- cas_sorted[-cold,]
i <- i+1
hurr <-grep("hurricane|tropical|typhoon", cas_sorted$Event_Type)
hurr_rows <- cas_sorted[hurr,]
cas_con[i,]<- c("hurricane", sum(hurr_rows$Injuries),sum(hurr_rows$Fatalities))
cas_sorted <- cas_sorted[-hurr,]
i <- i+1
fire <-grep("fire", cas_sorted$Event_Type)
fire_rows <- cas_sorted[fire,]
cas_con[i,]<- c("fire", sum(fire_rows$Injuries),sum(fire_rows$Fatalities))
cas_sorted <- cas_sorted[-fire,]
i <- i+1
fog <-grep("fog", cas_sorted$Event_Type)
fog_rows <- cas_sorted[fog,]
cas_con[i,]<- c("fog", sum(fog_rows$Injuries),sum(fog_rows$Fatalities))
cas_sorted <- cas_sorted[-fog,]
i <- i+1
slide <-grep("slide", cas_sorted$Event_Type)
slide_rows <- cas_sorted[slide,]
cas_con[i,]<- c("slide", sum(slide_rows$Injuries),sum(slide_rows$Fatalities))
cas_sorted <- cas_sorted[-slide,]
i <- i+1
rain <-grep("rain|recip|drizzle", cas_sorted$Event_Type)
rain_rows <- cas_sorted[rain,]
cas_con[i,]<- c("rain", sum(rain_rows$Injuries),sum(rain_rows$Fatalities))
cas_sorted <- cas_sorted[-rain,]
i <- i+1
hail <-grep("hail", cas_sorted$Event_Type)
hail_rows <- cas_sorted[hail,]
cas_con[i,]<- c("hail", sum(hail_rows$Injuries),sum(hail_rows$Fatalities))
cas_sorted <- cas_sorted[-hail,]
i <- i+1
storm <-grep("storm|dust|microburst", cas_sorted$Event_Type)
storm_rows <- cas_sorted[storm,]
cas_con[i,]<- c("storm", sum(storm_rows$Injuries),sum(storm_rows$Fatalities))
cas_sorted <- cas_sorted[-storm,]
For question 2, the same consolidation is done.
j <- 1 #counter
wind <-grep("wind", dmg_sorted$Event_Type)
notwind <- grep("chill", dmg_sorted$Event_Type)
wind <- setdiff(wind, notwind)
wind_rows <- dmg_sorted[wind,]
dmg_con[j,]<- c("wind", sum(wind_rows$Propdamages),sum(wind_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-wind,]
j <- j+1
flood <-grep("flood|stream fld", dmg_sorted$Event_Type)
flood_rows <- dmg_sorted[flood,]
dmg_con[j,]<- c("flood", sum(flood_rows$Propdamages),sum(flood_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-flood,]
j <- j+1
tornado <-grep("tornado|waterspout|funnel|landspout", dmg_sorted$Event_Type)
tornado_rows <- dmg_sorted[tornado,]
dmg_con[j,]<- c("tornado", sum(tornado_rows$Propdamages),sum(tornado_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-tornado,]
j <- j+1
hail <-grep("hail", dmg_sorted$Event_Type)
hail_rows <- dmg_sorted[hail,]
dmg_con[j,]<- c("hail", sum(hail_rows$Propdamages),sum(hail_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-hail,]
j <- j+1
lightning <-grep("lightning", dmg_sorted$Event_Type)
lightning_rows <- dmg_sorted[lightning,]
dmg_con[j,]<- c("lightning", sum(lightning_rows$Propdamages),sum(lightning_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-lightning,]
j <- j+1
icy <-grep("snow|ice|blizzard|winter|wintry|icy|frost|glaze|freezing", dmg_sorted$Event_Type)
icy_rows <- dmg_sorted[icy,]
dmg_con[j,]<- c("snow_ice_winter", sum(icy_rows$Propdamages),sum(icy_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-icy,]
j <- j+1
fire <-grep("fire", dmg_sorted$Event_Type)
fire_rows <- dmg_sorted[fire,]
dmg_con[j,]<- c("fire", sum(fire_rows$Propdamages),sum(fire_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-fire,]
j <- j+1
hurr <-grep("hurricane|tropical|typhoon", dmg_sorted$Event_Type)
hurr_rows <- dmg_sorted[hurr,]
dmg_con[j,]<- c("hurricane", sum(hurr_rows$Propdamages),sum(hurr_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-hurr,]
j <- j+1
rain <-grep("rain|recip|drizzle", dmg_sorted$Event_Type)
rain_rows <- dmg_sorted[rain,]
dmg_con[j,]<- c("rain", sum(rain_rows$Propdamages),sum(rain_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-rain,]
j <- j+1
storm <-grep("storm|dust|microburst|downburst", dmg_sorted$Event_Type)
storm_rows <- dmg_sorted[storm,]
dmg_con[j,]<- c("storm", sum(storm_rows$Propdamages),sum(storm_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-storm,]
j <- j+1
slide <-grep("slide", dmg_sorted$Event_Type)
slide_rows <- dmg_sorted[slide,]
dmg_con[j,]<- c("slide", sum(slide_rows$Propdamages),sum(slide_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-slide,]
j <- j+1
fog <-grep("fog", dmg_sorted$Event_Type)
fog_rows <- dmg_sorted[fog,]
dmg_con[j,]<- c("fog", sum(fog_rows$Propdamages),sum(fog_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-fog,]
j <- j+1
cold <-grep("cold|chill|hypothermia|freeze", dmg_sorted$Event_Type)
cold_rows <- dmg_sorted[cold,]
dmg_con[j,]<- c("cold", sum(cold_rows$Propdamages),sum(cold_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-cold,]
j <- j+1
current <-grep("current|surge|tide|surf|sea|wave|water|marine|drowning|swells|seiche", dmg_sorted$Event_Type)
current_rows <- dmg_sorted[current,]
dmg_con[j,]<- c("current", sum(current_rows$Propdamages),sum(current_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-current,]
j <- j+1
heat <-grep("heat|warm|hyperthermia", dmg_sorted$Event_Type)
heat_rows <- dmg_sorted[heat,]
dmg_con[j,]<- c("heat", sum(heat_rows$Propdamages),sum(heat_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-heat,]
j <- j+1
other <-grep("other", dmg_sorted$Event_Type)
other_rows <- dmg_sorted[other,]
dmg_con[j,]<- c("other", sum(other_rows$Propdamages),sum(other_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-other,]
j <- j+1
erosion <-grep("erosion", dmg_sorted$Event_Type)
erosion_rows <- dmg_sorted[erosion,]
dmg_con[j,]<- c("erosion", sum(erosion_rows$Propdamages),sum(erosion_rows$Cropdamages))
dmg_sorted <- dmg_sorted[-erosion,]
cas_con <- rbind(cas_con, cas_sorted)
cas_con_fatal <-cas_con[order(-as.integer(cas_con$Fatalities)),]
row.names(cas_con_fatal) <- NULL
print(cas_con_fatal)
## Event_Type Injuries Fatalities
## 1 heat 7702 2037
## 2 tornado 20670 1513
## 3 flood 8520 1337
## 4 current 858 752
## 5 wind 6614 747
## 6 lightning 4141 651
## 7 snow_ice_winter 3585 552
## 8 cold 132 382
## 9 avalanche 156 223
## 10 hurricane 1666 182
## 11 rain 260 96
## 12 fire 1458 87
## 13 fog 855 69
## 14 slide 55 43
## 15 tsunami 129 33
## 16 storm 443 21
## 17 hail 723 7
## 18 drought 4 0
## 19 other 4 0
From the above table, we see that heat caused the most fatalities, followed by tornado and flood. As for injuries, tornado, flood and heat were the top contributors in that order during the study period.
#create a new data.frame for drawing charts
cas_con_chart <-cas_con
cas_con_chart$Injuries <- as.numeric(cas_con$Injuries)
cas_con_chart$Fatalities <- as.numeric(cas_con$Fatalities)
cas_con_chart <-cas_con_chart[order(cas_con_chart$Fatalities),]
#draw a chart to show both injuries and casualties
par(mai=c(2,2,2,2))
barplot(t(as.matrix(cas_con_chart[, c("Injuries", "Fatalities")])), beside = TRUE, names.arg = cas_con_chart$Event_Type, main = "Injuries and Fatalities by Event Type (1996-2011)", xlab="Count", horiz=TRUE, las=1,col=c("red","blue"))
legend("bottomright", c("Injuries", "Fatalities"), fill=c("red","blue"))
#combine events that have been consolidated and the remaining ones, then sort
dmg_con <- rbind(dmg_con, dmg_sorted)
dmg_con$total <- as.numeric(dmg_con$Propdamages)+as.numeric(dmg_con$Cropdamages)
dmg_con_total <-dmg_con[order(-as.numeric(dmg_con$total)),]
row.names(dmg_con_total) <- NULL
#create a new data frame for drawing a barplot
dmg_con_chart <-dmg_con_total
dmg_con_chart$Propdamages <- as.numeric(dmg_con_total$Propdamages)
dmg_con_chart$Cropdamages <- as.numeric(dmg_con_total$Cropdamages)
dmg_con_chart$total <- as.numeric(dmg_con_total$total)
dmg_con_plot <-dmg_con_chart[,1:3]
#draw a chart to show both injuries and casualties
par(mai=c(2,2,2,2))
barplot(t(as.matrix(dmg_con_plot[, c("Propdamages", "Cropdamages")])), xaxt="n", names.arg = dmg_con_plot$Event_Type, main = "Property and Crop Damage by Event Type (1996-2011)", xlab="Damage ($B)", horiz=TRUE, las=1, col=c("red","blue"))
axis(side=1, at=c(0,5e10, 1e11, 1.5e11), labels=c(0, 50, 100, 150))
legend("topright", c("Property Damage", "Crop Damage"),fill=c("red","blue"))
From the above chart, we can see that for economic damage, hurricane caused the most total and property damage but drought caused the most crop damage during the study period.