options(scipen=999)
options(warn=-1)
library(ggplot2)
library(gridExtra)
## Loading required package: grid
Dat <- read.csv(bzfile("repdata-Data-StormData.csv.bz2"))
str(Dat)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
Sub <- Dat[Dat$FATALITIES != 0,]
Sub <- Sub[Sub$INJURIES != 0,]
summary(Sub$FATALITIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 2.862 2.000 158.000
summary(Sub$INJURIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 2.00 5.00 29.82 20.00 1700.00
hifatal <- subset(Sub,Sub$FATALITIES >= (quantile(Sub$FATALITIES,.9)))
hiinjury <- subset(Sub,Sub$INJURIES >= (quantile(Sub$INJURIES,.9)))
library(plyr)
fatalsum <- ddply(hifatal,.(EVTYPE),summarise,total=sum(FATALITIES),mean=mean(FATALITIES))
fatalsum <- arrange(fatalsum,desc(total))
injurysum <- ddply(hiinjury,.(EVTYPE),summarise,total=sum(INJURIES),mean=mean(INJURIES))
injurysum <- arrange(injurysum,desc(total))
head(fatalsum,7)
## EVTYPE total mean
## 1 TORNADO 3429 12.89098
## 2 EXCESSIVE HEAT 314 20.93333
## 3 FLASH FLOOD 66 11.00000
## 4 HEAT 54 10.80000
## 5 FLOOD 42 8.40000
## 6 WILDFIRE 34 11.33333
## 7 TSUNAMI 32 32.00000
head(injurysum,7)
## EVTYPE total mean
## 1 TORNADO 41556 196.9479
## 2 EXCESSIVE HEAT 4011 174.3913
## 3 FLOOD 2492 498.4000
## 4 ICE STORM 1568 1568.0000
## 5 HEAT 1320 165.0000
## 6 HURRICANE/TYPHOON 1200 400.0000
## 7 BLIZZARD 535 267.5000
Dat$PROPDMGEXP <- gsub("K","1000",Dat$PROPDMGEXP)
Dat$PROPDMGEXP <- gsub("M","10000",Dat$PROPDMGEXP)
Dat$PROPDMGEXP <- gsub("B","1000000000",Dat$PROPDMGEXP)
Dat$PROPDMGEXP[Dat$PROPDMGEXP ==""] <- "1"
Dat$PROPDMGEXP <- as.numeric(Dat$PROPDMGEXP)
Dat$CROPDMGEXP <- gsub("K","1000",Dat$CROPDMGEXP)
Dat$CROPDMGEXP <- gsub("M","1000000",Dat$CROPDMGEXP)
Dat$CROPDMGEXP <- gsub("B","1000000000",Dat$CROPDMGEXP)
Dat$CROPDMGEXP[Dat$CROPDMGEXP ==""] <- "1"
Dat$CROPDMGEXP <- as.numeric(Dat$CROPDMGEXP)
Dat$TOTALPROP <- (Dat$PROPDMGEXP*Dat$PROPDMG)
Dat$TOTALCROP <- (Dat$CROPDMGEXP*Dat$CROPDMG)
summary(Dat$TOTALPROP)
## Min. 1st Qu. Median Mean 3rd Qu.
## 0 0 0 319200 500
## Max. NA's
## 115000000000 28
summary(Dat$TOTALCROP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 54410 0 5000000000
## NA's
## 29
hiprop <- subset(Dat,Dat$TOTALPROP >= (quantile(Dat$TOTALPROP,.99,na.rm=TRUE)))
hicrop <- subset(Dat,Dat$TOTALCROP >= (quantile(Dat$TOTALCROP,.99,na.rm=TRUE)))
propsum <- ddply(hiprop,.(EVTYPE),summarise,total=sum(TOTALPROP),mean=mean(TOTALPROP))
propsum <- arrange(propsum,desc(total))
cropsum <- ddply(hicrop,.(EVTYPE),summarise,total=sum(TOTALCROP),mean=mean(TOTALPROP))
cropsum <- arrange(cropsum,desc(total))
fatalplot1 <- ggplot(fatalsum[1:7,],aes(x=EVTYPE,y=mean,width=.7))+
geom_bar(stat="identity",colour="red",fill="red")+
xlab("Event Type")+
ylab("Mean Fatalities per Event")+
theme(axis.text.x = element_text(angle=-90, hjust=0.5, vjust=0.5,size=10,colour="black"))+
theme(axis.title.y = element_text(size=14,colour="black"))+
theme(axis.title.x = element_text(size=14,colour="black"))+
scale_x_discrete(limits=c("TORNADO","EXCESSIVE HEAT","FLASH FLOOD","HEAT","FLOOD","WILDFIRE","TSUNAMI"))
fatalplot2 <- ggplot(fatalsum[1:7,],aes(x=EVTYPE,y=total,width=.7))+
geom_bar(stat="identity",colour="red",fill="red")+
xlab("Event Type")+
ylab("Total Fatalities per Event")+
theme(axis.text.x = element_text(angle=-90, hjust=0.5, vjust=0.5,size=10,colour="black"))+
theme(axis.title.y = element_text(size=14,colour="black"))+
theme(axis.title.x = element_text(size=14,colour="black"))+
scale_x_discrete(limits=c("TORNADO","EXCESSIVE HEAT","FLASH FLOOD","HEAT","FLOOD","WILDFIRE","TSUNAMI"))
injuryplot1 <- ggplot(injurysum[1:7,],aes(x=EVTYPE,y=mean,width=.7))+
geom_bar(stat="identity")+
xlab("Event Type")+
ylab("Mean Injuries per Event")+
theme(axis.text.x = element_text(angle=-90, hjust=0.5, vjust=0.5,size=10,colour="black"))+
theme(axis.title.y = element_text(size=14,colour="black"))+
theme(axis.title.x = element_text(size=14,colour="black"))+
scale_x_discrete(limits=c("TORNADO","EXCESSIVE HEAT","FLOOD","ICE STORM","HEAT","HURRICANE/TYPHOON","BLIZZARD"))
injuryplot2 <- ggplot(injurysum[1:7,],aes(x=EVTYPE,y=total,width=.7))+
geom_bar(stat="identity")+
xlab("Event Type")+
ylab("Total Injuries per Event")+
theme(axis.text.x = element_text(angle=-90, hjust=0.5, vjust=0.5,size=10,colour="black"))+
theme(axis.title.y = element_text(size=14,colour="black"))+
theme(axis.title.x = element_text(size=14,colour="black"))+
scale_x_discrete(limits=c("TORNADO","EXCESSIVE HEAT","FLOOD","ICE STORM","HEAT","HURRICANE/TYPHOON","BLIZZARD"))
grid.arrange(fatalplot2,fatalplot1,ncol=2,main="Total and Mean Fatalites for Highest Fatality-Causing Events (Fig. 1)")
grid.arrange(injuryplot2,injuryplot1,ncol=2,main="Total and Mean Injuries for Highest Injury-Causing Events (Fig. 2)")
propsum$type <- as.factor("Property")
cropsum$type <- as.factor("Crop")
alldamage <- rbind(head(propsum,30),head(cropsum,30))
dmgplot1 <- ggplot(alldamage[1:60,],aes(x=EVTYPE,y=total/1000000000,width=.7))+
geom_bar(stat="identity")+
xlab("Event Type")+
ylab("Total Damage (in Billions of Dollars)")+
theme(axis.text.x = element_text(angle=-90, hjust=0.5, vjust=0.5,size=10,colour="black"))+
theme(axis.title.y = element_text(size=10,colour="black"))+
theme(axis.title.x = element_text(size=10,colour="black"))+
facet_grid(type ~.)+
theme(strip.background = element_rect(fill="orange"))+
scale_x_discrete(limits=c("DROUGHT","FLOOD","HURRICANE/TYPHOON","STORM SURGE","TORNADO","HURRICANE","TROPICAL STORM","WINTERSTORM","RIVERFLOOD","RIVER STORM"))
dmgplot2 <- ggplot(alldamage[1:60,],aes(x=EVTYPE,y=mean/1000000000,width=.7))+
geom_bar(stat="identity")+
xlab("Event Type")+
ylab("Mean Damage (in Billions of Dollars)")+
theme(axis.text.x = element_text(angle=-90,size=7.5,colour="black"))+
theme(axis.title.y = element_text(size=10,colour="black"))+
theme(axis.title.x = element_text(size=10,colour="black"))+
facet_grid(type ~.)+
theme(strip.background = element_rect(fill="green"))+
scale_x_discrete(limits=c("HEAVY RAIN/SEVERE WEATHER","HURRICANE/TYPHOON","STORM SURGE","TORNADOES, TSTM WIND, HAIL","SEVERE THUNDERSTORM","HURRICANE OPAL","STORM SURGE/TIDE","RIVER FLOOD","HURRICANE","FLOOD"))
grid.arrange(dmgplot1,dmgplot2,ncol=2,main="Total and Mean Economic Damages for Costliest Events (Fig. 3)")