data <- read.csv("stormdata.csv")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
library(ggplot2)
library(plyr)
nmissing <- function(x) sum(is.na(x))
colwise(nmissing)(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
## 1 0 0 0 0 0 0 0 0 0
## BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI
## 1 0 0 0 0 0 902297 0 0
## END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG PROPDMGEXP
## 1 0 0 0 843563 0 0 0 0 0
## CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE LATITUDE_E
## 1 0 0 0 0 0 47 0 40
## LONGITUDE_ REMARKS REFNUM
## 1 0 0 0
injuryDataFrame <- ddply(data, .(EVTYPE),
summarize,
TotalHarm = sum(FATALITIES + INJURIES))
injuryDataFrame <- injuryDataFrame[order(injuryDataFrame$TotalHarm, decreasing = T), ]
TopHarm <- injuryDataFrame[1:10, ]
prop <- ddply(data, .(EVTYPE, PROPDMGEXP),
summarize,
PROPDMG = sum(PROPDMG))
prop <- mutate(prop,
PropertyDamage = ifelse(toupper(PROPDMGEXP) =='K',
PROPDMG*1000,
ifelse(toupper(PROPDMGEXP) =='M',
PROPDMG*1000000,
ifelse(toupper(PROPDMGEXP) == 'B',
PROPDMG*1000000000,
ifelse(toupper(PROPDMGEXP) == 'H',
PROPDMG*100, PROPDMG)))))
prop <- subset(prop,
select = c("EVTYPE", "PropertyDamage"))
prop.total <- ddply(prop, .(EVTYPE),
summarize,
TotalPropDamage = sum(PropertyDamage))
crop <- ddply(data, .(EVTYPE, CROPDMGEXP),
summarize,
CROPDMG = sum(CROPDMG))
crop <- mutate(crop,
CropDamage = ifelse(toupper(CROPDMGEXP) =='K',
CROPDMG*1000,
ifelse(toupper(CROPDMGEXP) =='M',
CROPDMG*1000000,
ifelse(toupper(CROPDMGEXP) == 'B',
CROPDMG*1000000000,
ifelse(toupper(CROPDMGEXP) == 'H',
CROPDMG*100,
CROPDMG)))))
crop <- subset(crop,
select = c("EVTYPE", "CropDamage"))
crop.total <- ddply(crop, .(EVTYPE),
summarize,
TotalCropDamage = sum(CropDamage))
damageDataFrame <- merge(prop.total, crop.total, by="EVTYPE")
damageDataFrame <- mutate(damageDataFrame,
TotalDamage = TotalPropDamage + TotalCropDamage)
damageDataFrame <- damageDataFrame[order(damageDataFrame$TotalDamage, decreasing = T), ]
TopDamage <- damageDataFrame[1:10, ]
1 . Population Health Casualties
The results of the top 10 harmful types based on the sum of casualties are:
TopHarm
## EVTYPE TotalHarm
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
plot1 <- ggplot(TopHarm, aes(EVTYPE,TotalHarm, fill=EVTYPE)) +
geom_bar(stat="identity") +
xlab("Top 10 events")+
ylab("Total Harm / Fatalties")+
ggtitle("Fatalities due to severe weather events in the U.S from 1950-2011") +
theme(axis.text.x=element_text(angle=45,hjust=1))
plot1
The top ten damages caused by Tornado are as follows:
TopDamage
## EVTYPE TotalPropDamage TotalCropDamage TotalDamage
## 170 FLOOD 144657709807 5661968450 150319678257
## 411 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 834 TORNADO 56937160779 414953270 57352114049
## 670 STORM SURGE 43323536000 5000 43323541000
## 244 HAIL 15732267543 3025954473 18758222016
## 153 FLASH FLOOD 16140812067 1421317100 17562129167
## 95 DROUGHT 1046106000 13972566000 15018672000
## 402 HURRICANE 11868319010 2741910000 14610229010
## 590 RIVER FLOOD 5118945500 5029459000 10148404500
## 427 ICE STORM 3944927860 5022113500 8967041360
plot2 <- ggplot(TopDamage, aes( EVTYPE,TotalDamage, fill=EVTYPE)) +
geom_bar(stat="identity") +
xlab("Top 10 events") +
ylab("Total Economic damage") +
ggtitle("Total Economic damage due to severe weather events in the U.S from 1950-2011") +
theme(axis.text.x=element_text(angle=45,hjust=1))
plot2
This is the plot based on the Total Crops Damaged:
plot3 <- ggplot(TopDamage, aes( EVTYPE,TotalCropDamage, fill=EVTYPE)) +
geom_bar(stat="identity") +
xlab("Top 10 events") +
ylab("Total Crop Economic damage") +
ggtitle("Total Economic Crop damage due to severe weather events in the U.S from 1950-2011") +
theme(axis.text.x=element_text(angle=45,hjust=1))
plot3