Course 5 - Assignment 2: Damage of weather events in the United States

Synopsis

  1. The data is downloaded from the NOAA storm database, which included data of the United States in 1950 to Nov 2011.
  2. The data is editted to make a standardized event table based on the table given in the data document (ref [2]). In the beginning, there are 985 levles of event types from the data. I manually clean up the data, and the level of the final catalogue is reduced to 55.
  3. The damage to human and ecomony are summed up for each cleaned event types.
  4. The results are examined by makeing the plots, the human damage v.s. event type (plot1) and the economy damage v.s. event type (plot2).

Data Processing

# data loading
if(file.exists('rawdata.csv.bz2')){
    url = 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
    download.file(url,destfile='rawdata.csv.bz2',method='curl')
}
data<-read.csv(bzfile('rawdata.csv.bz2'))

# a quick summary of the data
evtype <- levels(data$EVTYPE)
# cleanning data
tempstr = data$EVTYPE
newlevel<-c('-AVALANCHE','-ASTRONOMICAL HIGH TIDE','-ASTRONOMICAL LOW TIDE','-FROST/FREEZE','-RIP CURRENT','-HEAVY SNW','-FLD','-FLASH-FLD','-STORM SURGE','-DUST STORM','-DROUGHT','-MICROBURST','-DENSE SMOKE','-DENSE FOG','-FREEZING FOG','-ICE-JAM','-FRZ FOG','-SLEET','-DEBRIS FLOW','-DAM BREAK','-COLD/WIND CHILL','-EXT COLD','-COASTAL-FLD','-LAKESHORE-FLD','-LAKE-EFFECT SNW','-OTHER','-BLIZZARD','-HAIL','-ICE STORM','-FUNNEL CLOUD','-DUST DEVIL','-WATERSPOUT','-TROPICAL STORM','-TROPICAL DEPRESSION','-WILDFIRE','-LIGHTNING','-MARINE-StrW','-MARINE-HW','-MARINE-HAL','-MARINE-OTHER','-THUNDERSTORM WIND','-MARINE-THUSTRM','-HURRICANE','-VOLCANIC ASH','-TORNADO','-WINTRY MIX','-WINTER STORM','-TSUNAMI','-HIGH SURF','-SEICHE','-VOG','-WET','-EXT HEAT','-HEAT','-HEAVY RAIN','-HEAVY WIND')

tempstr <- factor(tempstr, levels = c(levels(tempstr), newlevel))
# this is going to be messy, and I will do it manually. and start from the event sounds seriously and unique, e.g. Tonado, "Volcanic", ...
ind <- grep("(TORNDAO|TORNADO|Whirlwind|ROTATING WALL CLOUD|WALL CLOUD)",data$EVTYPE,ignore.case=TRUE)
tempstr[ind]='-TORNADO'
ind <- grep("(VOLCANIC)",tempstr,ignore.case=TRUE)
tempstr[ind]='-VOLCANIC ASH'
ind <- grep("(HURRICANE|TYPHOON)",tempstr,ignore.case=TRUE)
tempstr[ind]='-HURRICANE'
ind <- grep("(MARINE TSTM)|(MARINE THU)",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MARINE-THUSTRM'
ind <- grep("(TSTM|THUNDERSTORM|TUNDERSTORM|THUNDEERSTORM|THUDERSTORM|THUNDESTROM|THUNDERTORM|THUNDERESTORM|THUNDERSTROM|THUNDERTSORM|THUNERSTORM|THUNDESTORM|Downburst|gustnadoes)",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-THUNDERSTORM WIND'
ind <- grep("MARINE Accident|MARINE MISHAP",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MARINE-OTHER'
ind <- grep("MARINE HAIL",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MARINE-HAL'
ind <- grep("MARINE HIGH",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MARINE-HW'
ind <- grep("MARINE STRONG",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MARINE-StrW'
ind <- grep("lightning|lighting|ligntning",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-LIGHTNING'
ind <- grep("fire",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-WILDFIRE'
ind <- grep("tropical storm",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-TROPICAL STORM'
ind <- grep("tropical depression",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-TROPICAL DEPRESSION'
ind <- grep("water s|waters|WAYTERSPOUT",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-WATERSPOUT'
ind <- grep("dust devil|dust devel|landspout|duststorm",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DUST DEVIL'
ind <- grep("avalance|avalanche",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-AVALANCHE'
ind <- grep("fun",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FUNNEL CLOUD'
ind <- grep("glaze/ice storm",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-ICE STORM'
ind <- grep("sleet|pellets|black ice|glaze",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-SLEET'
ind <- grep("hail",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-HAIL'
ind <- grep('bli',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-BLIZZARD'
ind <- grep('summary|other|\\?|none',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-OTHER'
ind <- grep('lake effect|lake snow|lake-effect',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-LAKE-EFFECT SNW'
ind <- grep('lake flood|lakeshore flood|lakeshre flood',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-LAKESHORE-FLD'
ind <- grep('coast|Cstl Flood|beach flood|BEACH',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-COASTAL-FLD'
ind <- which( (grepl('(chill|cold)',tempstr,ignore.case=TRUE) &
              grepl('wind',tempstr,ignore.case=TRUE)) &
              !(grepl('extr|prolon|record|UNSEASONAB|UNUSUAL|severe',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-COLD/WIND CHILL'
ind <- grep('dam ',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DAM BREAK'
ind <- grep('mud|slide|landslump',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DEBRIS FLOW'
ind <- grep("road",tempstr,ignore.case=TRUE)
tempstr[ind] <- '-SLEET'
ind <- grep('freezing fog|ice fog|freezing drizzle|freezing spray',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FRZ FOG'
ind <- grep('ice patchy|patchy ice|ice jam|ice floes',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-ICE-JAM'
ind <- intersect(grep('(ice)',tempstr,ignore.case=TRUE),
                 grep('snow|storm',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-ICE STORM'
ind <- intersect(grep('(ice)',tempstr,ignore.case=TRUE),
                 grep('winds',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-ICE STORM'
ind <- which(tempstr == 'ICE')
tempstr[ind] <- "-ICE STORM"
ind <- grep('freezing fog|fog and cold',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FREEZING FOG'
ind <- grep('dense fog',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DENSE FOG'
ind <- which(tempstr == 'FOG')
tempstr[ind] <- '-DENSE FOG'
ind <- grep('smoke',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DENSE SMOKE'
ind <- grep('microburst|mircoburst|micoburst',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-MICROBURST'
ind <- grep('dro|dry|driest',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DROUGHT'
ind <- grep('dust storm|blowing dust|dust storm|saharan',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-DUST STORM'
ind <- grep(' sea|wave|swell|surge|blow-out tide|high wind and high tide|high tides|tidal',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-STORM SURGE'
ind <- grep('flash',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FLASH-FLD'
ind <- grep('flood| fld',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FLD'
ind <- grep('heavy snow',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-HEAVY SNW'
ind <- grep('rip|current',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-RIP CURRENT'
ind <- grep('frost|freeze',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-FROST/FREEZE'
ind <- grep('winter mix|winter weather|wintery mix|wintry',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-WINTRY MIX'
ind <- grep('winter',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-WINTER STORM'
ind <- grep('surf',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-HIGH SURF'
ind <- grep('tsunami',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-TSUNAMI'
ind <- grep('lack of snow',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-OTHER'
ind <- intersect(grep('RECORD|heavy|excessive',tempstr,ignore.case=TRUE),grep('snow',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-HEAVY SNW'
ind <- grep('snow',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-WINTER STORM'
ind <- which( (grepl('cold',tempstr,ignore.case=TRUE)&grepl('excessive|ext|record|prolong|severe|unseas|unusually',tempstr,ignore.case=TRUE)) & ! grepl('-',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-EXT COLD'
ind <- which( (grepl('chill',tempstr,ignore.case=TRUE)&grepl('extreme',tempstr,ignore.case=TRUE)) & ! grepl('-',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-EXT COLD'

ind <- which( (grepl('cold',tempstr,ignore.case=TRUE))&!(grepl('-',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-COLD/WIND CHILL'
ind <- grep('SEICHE',tempstr,ignore.case=TRUE)
tempstr[ind] <- '-SEICHE'
ind <- which(grepl('wet',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-WET'
ind <- which(grepl('vog',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-VOG'
ind <- which( (grepl('warm|heat|hot|temperature',tempstr,ignore.case=TRUE) & 
               grepl('abnormal|very|unusual|spell|exc|ext|rec|unseason|burst',tempstr,ignore.case=TRUE))          
               &!(grepl('-|low',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-EXT HEAT'
ind <- which( (grepl('warm|heat|hot|temperature',tempstr,ignore.case=TRUE) )          
               &!(grepl('-|low',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-HEAT'
ind <- which( (grepl('cold|temp|cool spell',tempstr,ignore.case=TRUE) )          
               &!(grepl('-',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-EXT COLD'
ind <- which(grepl('thermia',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-EXT HEAT'
ind <- which(grepl('ASTRONOMICAL HIGH TIDE',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-ASTRONOMICAL HIGH TIDE'
ind <- which(grepl('ASTRONOMICAL LOW TIDE',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-ASTRONOMICAL LOW TIDE'
ind <- which( (grepl('rain|water|perci|stream|shower|precip',tempstr,ignore.case=TRUE) )          
               &!(grepl('-',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-HEAVY RAIN'
ind <- which( (grepl('wnd|wind|storm',tempstr,ignore.case=TRUE) )          
               &!(grepl('-',tempstr,ignore.case=TRUE)))
tempstr[ind] <- '-HEAVY WIND'
ind <- which(grepl('NON-SEVERE WIND',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-OTHER'
ind <- which(!grepl('-',tempstr,ignore.case=TRUE))
tempstr[ind] <- '-OTHER'


#store the standardized EVTYPE to data$altEVTYPE, and print out it's levels. 
data$altEVTYPE <- factor(tempstr)
levels(data$altEVTYPE)
##  [1] "-AVALANCHE"              "-ASTRONOMICAL HIGH TIDE"
##  [3] "-ASTRONOMICAL LOW TIDE"  "-FROST/FREEZE"          
##  [5] "-RIP CURRENT"            "-HEAVY SNW"             
##  [7] "-FLD"                    "-FLASH-FLD"             
##  [9] "-STORM SURGE"            "-DUST STORM"            
## [11] "-DROUGHT"                "-MICROBURST"            
## [13] "-DENSE SMOKE"            "-DENSE FOG"             
## [15] "-FREEZING FOG"           "-ICE-JAM"               
## [17] "-FRZ FOG"                "-SLEET"                 
## [19] "-DEBRIS FLOW"            "-DAM BREAK"             
## [21] "-COLD/WIND CHILL"        "-EXT COLD"              
## [23] "-COASTAL-FLD"            "-LAKESHORE-FLD"         
## [25] "-LAKE-EFFECT SNW"        "-OTHER"                 
## [27] "-BLIZZARD"               "-HAIL"                  
## [29] "-ICE STORM"              "-FUNNEL CLOUD"          
## [31] "-DUST DEVIL"             "-WATERSPOUT"            
## [33] "-TROPICAL STORM"         "-TROPICAL DEPRESSION"   
## [35] "-WILDFIRE"               "-LIGHTNING"             
## [37] "-MARINE-StrW"            "-MARINE-HW"             
## [39] "-MARINE-HAL"             "-THUNDERSTORM WIND"     
## [41] "-MARINE-THUSTRM"         "-HURRICANE"             
## [43] "-VOLCANIC ASH"           "-TORNADO"               
## [45] "-WINTRY MIX"             "-WINTER STORM"          
## [47] "-TSUNAMI"                "-HIGH SURF"             
## [49] "-SEICHE"                 "-VOG"                   
## [51] "-WET"                    "-EXT HEAT"              
## [53] "-HEAT"                   "-HEAVY RAIN"            
## [55] "-HEAVY WIND"
damage.human <- aggregate(cbind(FATALITIES,INJURIES)~altEVTYPE,data,FUN=sum)
damage.human$SUM <- damage.human$FATALITIES+damage.human$INJURIES
names(damage.human) = c('EVTYPE','Fatalities','Injuries','Total')
indHuman = sort(damage.human$Total,decreasing=TRUE,index.return=TRUE)
sdamage.human=damage.human[indHuman$ix,]
sdamage.human$index = 1:55
head(sdamage.human,10)
##                EVTYPE Fatalities Injuries Total index
## 44           -TORNADO       5662    91407 97069     1
## 40 -THUNDERSTORM WIND        711     9510 10221     2
## 52          -EXT HEAT       2030     6747  8777     3
## 7                -FLD        512     6873  7385     4
## 36         -LIGHTNING        817     5231  6048     5
## 53              -HEAT        937     2102  3039     6
## 8          -FLASH-FLD       1035     1800  2835     7
## 55        -HEAVY WIND        428     1840  2268     8
## 29         -ICE STORM        100     2141  2241     9
## 46      -WINTER STORM        252     1480  1732    10
#unify the unit. 
index=c('h','k','m','B')
factor=c(1/10000.,1/1000.,1.,1000.)
data$NormPROPDMG = data$PROPDMG * 0.0
data$NormCROPDMG = data$CROPDMG * 0.0
for (i in 1:4){
    ind = which(grepl(index[i],data$PROPDMGEXP,ignore.case=TRUE))
    data$NormPROPDMG[ind] = data$PROPDMG[ind]*factor[i]
    ind = which(grepl(index[i],data$CROPDMGEXP,ignore.case=TRUE))
    data$NormCROPDMG[ind] = data$CROPDMG[ind]*factor[i]
}
#summing up
damage.property<- aggregate(cbind(NormPROPDMG,NormCROPDMG)~altEVTYPE,data,FUN=sum)
damage.property$SUM<-damage.property$NormPROPDMG+damage.property$NormCROPDMG
names(damage.property) = c('EVTYPE','Property','Crop','Total')
indProp = sort(damage.property$Total,decreasing=TRUE,index.return=TRUE)
sdamage.property=damage.property[indProp$ix,]
sdamage.property$index = 1:55
head(sdamage.property,10)
##                EVTYPE Property      Crop  Total index
## 7                -FLD   150218 10851.314 161069     1
## 42         -HURRICANE    85356  5516.118  90873     2
## 44           -TORNADO    58593   417.461  59011     3
## 9        -STORM SURGE    47976     6.405  47983     4
## 28              -HAIL    15975  3046.888  19021     5
## 8          -FLASH-FLD    16907  1532.197  18439     6
## 11           -DROUGHT     1046 13972.622  15019     7
## 40 -THUNDERSTORM WIND    10971  1271.664  12243     8
## 29         -ICE STORM     3967  5022.114   8989     9
## 35          -WILDFIRE     8497   403.282   8900    10

Results

library(ggplot2)
    plotDamageHuman=data.frame(index=rep(1:55,2),EVTYPE=rep(sdamage.human$EVTYPE,2),y=c(sdamage.human$Fatalities,sdamage.human$Total), case=rep(c('Falalties','Total'),each=55))
    qplot(index, y, data=plotDamageHuman,log='y',geom=c('point','line'),color=case,ylab='number of human')

plot of chunk unnamed-chunk-5

    plotDamageProperty=data.frame(index=rep(1:55,3),EVTYPE=rep(sdamage.property$EVTYPE,3),y=c(sdamage.property$Property,sdamage.property$Crop,sdamage.property$Total), case=rep(c('Property','Crop','Total'),each=55))
    qplot(index, y, data=plotDamageProperty,log='y',geom=c('point','line'),color=case,ylab='damage (mUSD)')

plot of chunk unnamed-chunk-6

Reference:

[1] Storm Data FAQ https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf

[2] Storm Data Documentation: https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf

[3] State Code from Wikipedia: http://en.wikipedia.org/wiki/List_of_U.S._state_abbreviations