The data for this assignment can be downloaded from the course web site: Storm Data.
This analysis report aims to answer the following 2 questions:
During the data processing, the following steps are conducted:
Load packages and set language of system time.
library(dplyr)
library(ggplot2)
Sys.setlocale("LC_TIME","en_US.UTF-8")
setwd("D:/GXY's documents/R/5-ReproducibleResearch/week4")
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url,"repdata_data_StormData.csv",method="curl")
data <- read.csv("repdata_data_StormData.csv",header=TRUE)
Subset relevant columns for later analysis.
Extract the month from BGN_DATE.
data <- subset(data,select=c(BGN_DATE,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP))
data <- mutate(data,month=format(strptime(data$BGN_DATE,"%m/%d/%Y %H:%M:%S"),"%m"))
Format PROPDMGEXP and CROPDMGEXP in an uniform way and calculate economic damage (PROPDMG * PROPDMGEXP + CROPDMG * CROPDMGEXP).
data <- mutate(data,PROPDMGEXP_number=PROPDMGEXP,CROPDMGEXP_number=CROPDMGEXP)
data$PROPDMGEXP_number <- gsub("1",10,data$PROPDMGEXP_number) %>%
gsub("2",100,.) %>%
gsub("3",1000,.) %>%
gsub("4",10000,.) %>%
gsub("5",100000,.) %>%
gsub("6",1000000,.) %>%
gsub("7",10000000,.) %>%
gsub("8",100000000,.) %>%
gsub("^0$",1,.) %>%
gsub("[Hh]",100,.) %>%
gsub("K",1000,.) %>%
gsub("[Mm]",1000000,.) %>%
gsub("B",1000000000,.) %>%
gsub("(^$)|-|(\\+)|(\\?)",0,.)
data$CROPDMGEXP_number <- gsub("2",100,data$CROPDMGEXP_number) %>%
gsub("^0$",1,.) %>%
gsub("[Kk]",1000,.) %>%
gsub("[Mm]",1000000,.) %>%
gsub("B",1000000000,.) %>%
gsub("(^$)|(\\?)",0,.)
data <- mutate(data,economic_damage=PROPDMG*as.integer(PROPDMGEXP_number)+CROPDMG*as.integer(CROPDMGEXP_number))
Format and merge EVTYPE (upper/lower cases, misspelling, extra space ” ” or other characters, synonymous phrases, etc.).
data <- mutate(data,event_type=trimws(toupper(EVTYPE)))
data$event_type <- gsub("(\\.|/|;|-)$","",data$event_type) %>%
gsub("AVALANCE","AVALANCHE",.) %>%
gsub("COASTALSTORM","COASTAL STORM",.) %>%
gsub("CHIL$","CHILL",.) %>%
gsub("DEVEL","DEVIL",.) %>%
gsub("DUSTSTORM","DUST STORM",.) %>%
gsub("EROSIN","EROSION",.) %>%
gsub("(FLOODING|FLOOODING|FLDG|FLD|(FLOODIN$))","FLOOD",.) %>%
gsub("HAIL STORM","HAILSTORM",.) %>%
gsub("HVY","HEAVY",.) %>%
gsub("(LIGHTING|LIGNTNING|LIGHTNING WAUSEON)","LIGHTNING",.) %>%
gsub("(MICOBURST|MIRCOBURST)","MICROBURST",.) %>%
gsub("MUD SLIDE","MUDSLIDE",.) %>%
gsub("(PRECIPATATION|(PRECIP$))","PRECIPITATION",.) %>%
gsub("SML","SMALL",.) %>%
gsub("STRM","STREAM",.) %>%
gsub("(STROM|TSORM)","STORM",.) %>%
gsub("(THUNDERTROM|THUNDESTORM|THUNERSTORM|TUNDERSTORM|THUNDERTORM|THUNDEERSTORM|THUNDERESTORM|THUDERSTORM)","THUNDERSTORM",.) %>%
gsub("(TORNADOES|TORNDAO)","TORNADO",.) %>%
gsub("WAYTER","WATER",.) %>%
gsub("WATER SPOUT","WATERSPOUT",.) %>%
gsub("WETNE","WET",.) %>%
gsub("(WINDCHILL|WINDCHILL TEMPERATURE)","WIND CHILL",.) %>%
gsub("(WINTRY|WINTERY)","WINTER",.) %>%
gsub("(WND|W IND|WINS|WINDS|WINDS G|((WI|WIN)$))","WIND",.) %>%
gsub("WINDHAIL","WIND/HAIL",.) %>%
gsub("ACCUMULATED SNOWFALL","SNOW ACCUMULATION",.) %>%
gsub("ASHFALL","ASH",.) %>%
gsub("(BEACH|CSTL)","COASTAL",.) %>%
gsub("COASTA(L|L |L )FLOO(DING|D)","COASTAL FLOOD",.) %>%
gsub("DAM FAILURE","DAM BREAK",.) %>%
gsub("DOWNBURST WIND","DOWNBURST",.) %>%
gsub("(DRYNESS|DRY (PATTERN|WEATHER))","DRY",.) %>%
gsub("(EXCESSIVE HEAT/DROUGHT|(HEAT( |/| WAVE )DROUGHT))","DROUGHT/EXCESSIVE HEAT",.) %>%
gsub("EXCESSIVE PRECIPITATION","HEAVY PRECIPITATION",.) %>%
gsub("EXCESSIVE RAIN","HEAVY RAIN",.) %>%
gsub("EXCESSIVE SNOW","HEAVY SNOW",.) %>%
gsub("(EXTENDED|PROLONGED)","PROLONG",.) %>%
gsub("EXTREME WIND CHILL/BLOWING SNO","BLOWING SNOW/EXTREME WIND CHILL",.) %>%
gsub("FLASH FLOOD LANDSLIDE","FLASH FLOOD/LANDSLIDE",.) %>%
gsub("FLOOD(/| & )HEAVY RAIN","HEAVY RAIN/FLOOD",.) %>%
gsub("^FUNNEL(|S)$","FUNNEL CLOUD",.) %>%
gsub("GLAZE ICE","GLAZE",.) %>%
gsub("HEAVY RAIN EFFECT","HEAVY RAIN",.) %>%
gsub("HEAVY RAIN/LIGHTNING","LIGHTNING/HEAVY RAIN",.) %>%
gsub("HEAVY SWELL","HEAVY SURF",.) %>%
gsub("HEAVY SNOW ANDBLOWING SNOW","HEAVY SNOW/BLOWING SNOW",.) %>%
gsub("HEAVY SNOW/BLIZZARD","BLIZZARD/HEAVY SNOW",.) %>%
gsub("HIGH WIND(/|/ )BLIZZARD","BLIZZARD/HIGH WIND",.) %>%
gsub("HIGH WIND DUST STORM","DUST STORM/HIGH WIND",.) %>%
gsub("HIGH WIND HEAVY RAIN","HIGH WIND/HEAVY RAIN",.) %>%
gsub("HIGH WIND(/| AND )HEAVY SNOW","HEAVY SNOW/HIGH WIND",.) %>%
gsub("^HYPOTHERMIA$","HYPOTHERMIA/EXPOSURE",.) %>%
gsub("ICE(/| AND )SNOW","SNOW/ICE",.) %>%
gsub("ICE JAM FLOOD \\(MINOR","ICE JAM FLOOD",.) %>%
gsub("ICE( ON | )ROAD","ICY ROAD",.) %>%
gsub("ICE STORM AND SNOW","SNOW/ICE STORM",.) %>%
gsub("LAKE EFFECT","LAKE-EFFECT",.) %>%
gsub("LAKE FLOOD","LAKESHORE FLOOD",.) %>%
gsub("LANDSLUMP","LANDSLIDE",.) %>%
gsub("((LATE(-| )SEASON)|UNUSUALLY LATE)","LATE",.) %>%
gsub("(LARGE|ROTATING) WALL CLOUD","WALL CLOUD",.) %>%
gsub("LIGHTNING( | AND )THUNDERSTORM WIND","THUNDERSTORM WIND/LIGHTNING",.) %>%
gsub("MICROBURST WIND","MICROBURST",.) %>%
gsub("MISHAP","ACCIDENT",.) %>%
gsub("NON-","NON ",.) %>%
gsub("RAINFALL","RAIN",.) %>%
gsub("RAIN \\(HEAVY\\)","HEAVY RAIN",.) %>%
gsub("RECORD/EXCESSIVE HEAT","RECORD HEAT",.) %>%
gsub("RECORD HIGH TEMPERATURE","HIGH TEMPERATURE RECORD",.) %>%
gsub("RECORD TEMPERATURE","TEMPERATURE RECORD",.) %>%
gsub("RECORD WARM TEMP","RECORD WARM",.) %>%
gsub("RIP CURRENTS( |/)","RIP CURRENT/",.) %>%
gsub("SEVERE COLD","EXCESSIVE COLD",.) %>%
gsub("^SMALL STREAM$","SMALL STREAM FLOOD",.) %>%
gsub("(SNOW/ BITTER COLD|COLD AND SNOW)","SNOW/COLD",.) %>%
gsub("SNOWFALL RECORD","RECORD SNOW",.) %>%
gsub("(SNOWFALL|SNOW SHOWER)","SNOW",.) %>%
gsub("SNOW( |/)FREEZING RAIN","FREEZING RAIN/SNOW",.) %>%
gsub("SNOW/HIGH WIND","HIGH WIND/SNOW",.) %>%
gsub("(SNOW/RAIN/SLEET|SLEET/RAIN/SNOW)","SNOW/SLEET/RAIN",.) %>%
gsub("SNOW/RAIN","RAIN/SNOW",.) %>%
gsub("SNOW(-|/|)SQUALL","SNOW SQUALL",.) %>%
gsub("(SNOW SLEET|SLEET/SNOW)","SNOW/SLEET",.) %>%
gsub("THUNDERSTORM DAMAGE","THUNDERSTORM WIND DAMAGE",.) %>%
gsub("THUNDERSTORM HAIL","THUNDERSTORM WIND/HAIL",.) %>%
gsub("THUNDERSTORM WIND( |/)FUNNEL CLOU","THUNDERSTORM WIND/FUNNEL CLOUD",.) %>%
gsub("TSTM HEAVY RAIN","THUNDERSTORM WIND/HEAVY RAIN",.) %>%
gsub("(UNUSUAL|ABNORMAL|ABNORMALLY) ","UNUSUALLY ",.) %>%
gsub("UNSEASONA(BLE|L)","UNSEASONABLY",.) %>%
gsub("URBAN FLOOD LANDSLIDE","LANDSLIDE/URBAN FLOOD",.) %>%
gsub("VERY DRY","UNUSUALLY DRY",.) %>%
gsub("VERY WARM","UNSEASONABLY WARM",.) %>%
gsub("WARMTH","WARM",.) %>%
gsub("WIND CHILL/HIGH WIND","HIGH WIND/WIND CHILL",.) %>%
gsub("WIND GUST","GUSTY WIND",.) %>%
gsub("^WIND/HAIL$","HAIL/WIND",.) %>%
gsub("(WINTER MIX|WINTER WEATHER( |/)MIX)","WINTER WEATHER",.) %>%
gsub("WINTER STORM HIGH WIND","WINTER STORM/HIGH WIND",.) %>%
gsub(" ADVISOR(Y|IE)","",.) %>%
gsub("(^(BITTER|COLD) WIND CHILL.*)|(^COLD/WIND$)","COLD/WIND CHILL",.) %>%
gsub("BLIZZARD (SUMMARY|WEATHER)","BLIZZARD",.) %>%
gsub("BLOWING SNOW(- EXTREME WIND CHI| & EXTREME WIND CH)","BLOWING SNOW/EXTREME WIND CHILL",.) %>%
gsub("(BRUSH|WILD/FOREST|WILD|FOREST|GRASS) FIRE","WILDFIRE",.) %>%
gsub("(COASTAL EROSION/COASTAL FLOOD|EROSION/COASTAL FLOOD)","COASTAL FLOOD/EROSION",.) %>%
gsub("COLD (TEMPERATURE|WAVE|WEATHER)","COLD",.) %>%
gsub(" (CONDITION|SPELL)","",.) %>%
gsub("(DRY HOT WEATHER|WARM DRY)","HOT/DRY",.) %>%
gsub("DRY MICROBURST.+[0-9]+.*","DRY MICROBURST",.) %>%
gsub("^(FLASH FLOOD/FLOOD|FLOOD FLOOD/FLASH|FLOOD/FLASH/FLOOD|FLOOD/FLASHFLOOD|FLASH FLOOD/ FLOOD)$","FLOOD/FLASH FLOOD",.) %>%
gsub("^(FLOOD( |/)FLASH)$","FLASH FLOOD",.) %>%
gsub("(FREEZING RAIN SLEET AND|(SLEET(/| & )FREEZING RAIN))","FREEZING RAIN/SLEET",.) %>%
gsub("HAIL.+[0-9]+.*","HAIL",.) %>%
gsub("(HEAT( WAVE|BURST)|HOT (PATTERN|WEATHER)|(^HOT$)|WARM WEATHER)","HEAT",.) %>%
gsub("HIGH WIND.+[0-9]+.*","HIGH WIND",.) %>%
gsub("HIGH (SEA|SWELL| SWELL|TIDE|WAVE)","HIGH SURF",.) %>%
gsub("(HURRICANE( EDOUARD| EMILY| ERIN| FELIX| GORDON| OPAL|/TYPHOON)|TYPHOON)","HURRICANE",.) %>%
gsub("PATCHY ","",.) %>%
gsub("(ROGUE WAVE|ROUGH SEA|ROUGH SURF|HEAVY SEA)","HEAVY SURF",.) %>%
gsub("(TSTMW|TSTM WIND.+[0-9]+.*|THUNDERSTORM.+[0-9]+.*|THUNDERSTORMWIND|(^THUNDERSTORMW$)|THUNDERSTORMW 50|THUNDERSTORM WIND LE CEN)","THUNDERSTORM WIND",.) %>%
gsub("(THUNDERSTORM(S|W)|TSTM)","THUNDERSTORM",.) %>%
gsub("(THUNDERSTORM|THUNDERSTORMS|TSTM)$","THUNDERSTORM WIND",.) %>%
gsub("TORNADO F[0-9]","TORNADO",.) %>%
gsub("(TORNADO/WATERSPOUT|(WATERSPOUT(-| )TORNADO))","WATERSPOUT/TORNADO",.) %>%
gsub("TROPICAL STORM .+","TROPICAL STORM",.) %>%
gsub("^((URBAN( |/| AND )SMALL)|URBAN/SMALL FLOOD|(URBAN(/| AND )SMALL STREAM)|URBAN/STREET FLOOD|URBAN SMALL STREAM FLOOD|(SMALL STREAM( |/| AND )URBAN FLOOD)|SMALL STREAM AND)$","URBAN/SMALL STREAM FLOOD",.) %>%
gsub("((^SUMMARY.+)|(.+YEAR$)|(.*MONTH.*))","-",.) %>%
gsub(" +"," ",.) %>%
gsub("( TO|S+| AND)$","",.) %>%
gsub("(S/|/ +|\\\\| (AND|&|-) )","/",.) %>%
gsub("FLASH FLOOD/THUNDERSTORM WIND","THUNDERSTORM WIND/FLASH FLOOD",.) %>%
gsub("THUNDERSTORM WIND ","THUNDERSTORM WIND/",.)
Group the data by month and event types.
data_month_event <- group_by(data,month,event_type)
Calculate fatalities by event type by month, then order event types in each month by fatalities in descending manner. Top 5 event types of each month are annotated in detail, other event types are annotated as “OTHERS”. Arrange all top events in desired order for better coloring in “Results” section.
Fatality_by_eventtype_month <- as.data.frame(summarize(data_month_event,sum(FATALITIES)))
Fatality_by_eventtype_month <- Fatality_by_eventtype_month[order(Fatality_by_eventtype_month$month,-Fatality_by_eventtype_month[,3]),]
colnames(Fatality_by_eventtype_month)[3] <- "Fatality"
row.names(Fatality_by_eventtype_month) <- NULL
for (i in 1:12){
Fatality_by_eventtype_month[Fatality_by_eventtype_month$month==sprintf("%02d",i),][-(1:5),]$event_type <- "OTHERS"
}
Fatality_by_eventtype_month$event_type <- factor(Fatality_by_eventtype_month$event_type,levels=c("AVALANCHE","WINTER STORM","EXTREME COLD","COLD/WIND CHILL","EXTREME COLD/WIND CHILL","HEAT","EXCESSIVE HEAT","WILDFIRE","LIGHTNING","THUNDERSTORM WIND","FLOOD","FLASH FLOOD","RIP CURRENT","HURRICANE","TORNADO","HIGH WIND","OTHERS"))
Calculate injuries by event type by month, then order event types in each month by injuries in descending manner. Top 5 event types of each month are annotated in detail, other event types are annotated as “OTHERS”. Arrange all top events in desired order for better coloring in “Results” section.
Injury_by_eventtype_month <- as.data.frame(summarize(data_month_event,sum(INJURIES)))
Injury_by_eventtype_month <- Injury_by_eventtype_month[order(Injury_by_eventtype_month$month,-Injury_by_eventtype_month[,3]),]
colnames(Injury_by_eventtype_month)[3] <- "Injury"
row.names(Injury_by_eventtype_month) <- NULL
for (i in 1:12){
Injury_by_eventtype_month[Injury_by_eventtype_month$month==sprintf("%02d",i),][-(1:5),]$event_type <- "OTHERS"
}
Injury_by_eventtype_month$event_type <- factor(Injury_by_eventtype_month$event_type,levels=c("HEAVY SNOW","BLIZZARD","WINTER STORM","ICE STORM","WINTER WEATHER","GLAZE","HEAT","EXCESSIVE HEAT","WILDFIRE","LIGHTNING","THUNDERSTORM WIND","TROPICAL STORM","FLOOD","FLASH FLOOD","HURRICANE","TORNADO","HIGH WIND","FOG","HAIL","OTHERS"))
Calculate economic damage values by event type by month, then order event types in each month by economic damage values in descending manner. Top 5 event types of each month are annotated in detail, other event types are annotated as “OTHERS”. Arrange all top events in desired order for better coloring in “Results” section.
EconomicDamage_by_eventtype_month <- as.data.frame(summarize(data_month_event,sum(economic_damage)))
EconomicDamage_by_eventtype_month <- EconomicDamage_by_eventtype_month[order(EconomicDamage_by_eventtype_month$month,-EconomicDamage_by_eventtype_month[,3]),]
colnames(EconomicDamage_by_eventtype_month)[3] <- "Economic_Damage"
row.names(EconomicDamage_by_eventtype_month) <- NULL
for (i in 1:12){
EconomicDamage_by_eventtype_month[EconomicDamage_by_eventtype_month$month==sprintf("%02d",i),][-(1:5),]$event_type <- "OTHERS"
}
EconomicDamage_by_eventtype_month$event_type <- factor(EconomicDamage_by_eventtype_month$event_type,levels=c("FROST/FREEZE","WINTER STORM","ICE STORM","WILDFIRE","DROUGHT","THUNDERSTORM WIND","TROPICAL STORM","FLOOD","FLASH FLOOD","RIVER FLOOD","STORM SURGE","STORM SURGE/TIDE","HURRICANE","TORNADO","TORNADO, THUNDERSTORM WIND, HAIL","HIGH WIND","HAIL","OTHERS"))
Plot 1 for fatalities by month. Top 5 event types of each
month are annotated in detail.
Top event types show differential distribution pattern across the year,
e.g., tornado-caused fatalities are enriched in March to June, heat and
lightning-caused fatalities are enriched in July to August,
winter/cold-related fatalities are enriched in January. But generally
tornado causes the most fatalities.
ggplot(Fatality_by_eventtype_month,aes(x=month,y=Fatality,fill=event_type))+
geom_bar(stat="identity",position="stack")+
theme_bw()+
ggtitle("Plot 1. Fatalities by different event types in each month")+
xlab("Month")+
ylab("Fatality numbers")+
scale_fill_manual(values=c("AVALANCHE"="lightblue","WINTER STORM"="dodgerblue","EXTREME COLD"="steelblue","COLD/WIND CHILL"="cornflowerblue","EXTREME COLD/WIND CHILL"="royalblue","HEAT"="orange","EXCESSIVE HEAT"="darkorange2","WILDFIRE"="red2","LIGHTNING"="yellow","THUNDERSTORM WIND"="gold","FLOOD"="green3","FLASH FLOOD"="palegreen","RIP CURRENT"="olivedrab","HURRICANE"="cyan","TORNADO"="plum3","HIGH WIND"="pink","OTHERS"="gray"),name="Event Types")
Plot 2 for injuries by month. Top 5 event types of each month
are annotated in detail.
Top event types show differential distribution pattern across the year,
e.g., tornado-caused injuries are enriched in March to June though
present in all months, heat and lightning-caused injuries are enriched
in July to August, flood-caused injuries are enriched in October. But
generally tornado causes the most injuries.
ggplot(Injury_by_eventtype_month,aes(x=month,y=Injury,fill=event_type))+
geom_bar(stat="identity",position="stack")+
theme_bw()+
ggtitle("Plot 2. Injuries by different event types in each month")+
xlab("Month")+
ylab("Injury numbers")+
scale_fill_manual(values=c("HEAVY SNOW"="lightblue","BLIZZARD"="skyblue","WINTER STORM"="dodgerblue","ICE STORM"="cornflowerblue","WINTER WEATHER"="steelblue","GLAZE"="mediumblue","HEAT"="orange","EXCESSIVE HEAT"="darkorange2","WILDFIRE"="red2","LIGHTNING"="yellow","THUNDERSTORM WIND"="gold","TROPICAL STORM"="goldenrod","FLOOD"="green3","FLASH FLOOD"="palegreen","HURRICANE"="cyan","TORNADO"="plum3","HIGH WIND"="pink","FOG"="wheat","HAIL"="beige","OTHERS"="gray"),name="Event Types")
Plot 3 for economic damages by month. Top 5 event types of
each month are annotated in detail.
Top event types show differential distribution pattern across the year,
e.g., flood and storm surge-caused economic damages are enriched in
January and August, hurricane-caused economic damages are enriched in
August to October, tornado-caused economic damages are enriched in March
to June. But generally flood causes the greatest economic
consequences.
ggplot(EconomicDamage_by_eventtype_month,aes(x=month,y=Economic_Damage,fill=event_type))+
geom_bar(stat="identity",position="stack")+
theme_bw()+
ggtitle("Plot 3. Economic damage by different event types in each month")+
xlab("Month")+
ylab("Economic damage values")+
scale_fill_manual(values=c("FROST/FREEZE"="lightblue","WINTER STORM"="dodgerblue","ICE STORM"="cornflowerblue","WILDFIRE"="red2","DROUGHT"="orange","THUNDERSTORM WIND"="gold","TROPICAL STORM"="goldenrod","FLOOD"="green3","FLASH FLOOD"="palegreen","RIVER FLOOD"="yellowgreen","STORM SURGE"="seagreen","STORM SURGE/TIDE"="darkgreen","HURRICANE"="cyan","TORNADO"="plum3","TORNADO, THUNDERSTORM WIND, HAIL"="violet","HIGH WIND"="pink","HAIL"="beige","OTHERS"="gray"),name="Event Types")