# download file containing data if it hasn't already been downloaded
Url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
File <- "StormData.csv"
if (!file.exists(File)) {
download.file(Url, File, mode = "wb")
}
SD = read.csv('StormData.csv')
str(SD)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
totFatalities = aggregate(SD$FATALITIES, by=list(SD$EVTYPE),"sum")
names(totFatalities) = c('Event', 'Fatalities')
totFatalitiesSorted = totFatalities %>%
arrange(desc(Fatalities)) %>%
head(20)
totFatalitiesSorted
## Event Fatalities
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
## 11 WINTER STORM 206
## 12 RIP CURRENTS 204
## 13 HEAT WAVE 172
## 14 EXTREME COLD 160
## 15 THUNDERSTORM WIND 133
## 16 HEAVY SNOW 127
## 17 EXTREME COLD/WIND CHILL 125
## 18 STRONG WIND 103
## 19 BLIZZARD 101
## 20 HIGH SURF 101
totInjuries <- aggregate(SD$INJURIES, by = list(SD$EVTYPE), "sum")
names(totInjuries) <- c("Event", "Injuries")
totInjuriesSorted = totInjuries %>%
arrange(desc(Injuries)) %>%
head(20)
totInjuriesSorted
## Event Injuries
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
## 11 WINTER STORM 1321
## 12 HURRICANE/TYPHOON 1275
## 13 HIGH WIND 1137
## 14 HEAVY SNOW 1021
## 15 WILDFIRE 911
## 16 THUNDERSTORM WINDS 908
## 17 BLIZZARD 805
## 18 FOG 734
## 19 WILD/FOREST FIRE 545
## 20 DUST STORM 440
par(mfrow=c(1,2), mar = c(10,4,2,2),las = 3,cex = 0.7,cex.main = 1.4, cex.lab = 1.2)
barplot(totFatalitiesSorted$Fatalities, names.arg = totFatalitiesSorted$Event, col = 'blue',
main = 'Top 20 Weather Events for Fatalities', ylab = 'Number of Fatalities')
barplot(totInjuriesSorted$Injuries, names.arg = totInjuriesSorted$Event, col = 'red',
main = 'Top 20 Weather Events for Injuries', ylab = 'Number of Injuries')
totProperty <- aggregate(SD$PROPDMG, by = list(SD$EVTYPE), "sum")
names(totProperty) <- c("Event", "Property")
totPropertySorted = totProperty %>%
arrange(desc(Property)) %>% head(20)
totPropertySorted
## Event Property
## 1 TORNADO 3212258.16
## 2 FLASH FLOOD 1420124.59
## 3 TSTM WIND 1335965.61
## 4 FLOOD 899938.48
## 5 THUNDERSTORM WIND 876844.17
## 6 HAIL 688693.38
## 7 LIGHTNING 603351.78
## 8 THUNDERSTORM WINDS 446293.18
## 9 HIGH WIND 324731.56
## 10 WINTER STORM 132720.59
## 11 HEAVY SNOW 122251.99
## 12 WILDFIRE 84459.34
## 13 ICE STORM 66000.67
## 14 STRONG WIND 62993.81
## 15 HIGH WINDS 55625.00
## 16 HEAVY RAIN 50842.14
## 17 TROPICAL STORM 48423.68
## 18 WILD/FOREST FIRE 39344.95
## 19 FLASH FLOODING 28497.15
## 20 URBAN/SML STREAM FLD 26051.94
totCrop <- aggregate(SD$CROPDMG, by = list(SD$EVTYPE), "sum")
names(totCrop) <- c("Event", "Crop")
totCropSorted = totCrop %>% arrange(desc(Crop)) %>% head(20)
totCropSorted
## Event Crop
## 1 HAIL 579596.28
## 2 FLASH FLOOD 179200.46
## 3 FLOOD 168037.88
## 4 TSTM WIND 109202.60
## 5 TORNADO 100018.52
## 6 THUNDERSTORM WIND 66791.45
## 7 DROUGHT 33898.62
## 8 THUNDERSTORM WINDS 18684.93
## 9 HIGH WIND 17283.21
## 10 HEAVY RAIN 11122.80
## 11 FROST/FREEZE 7034.14
## 12 EXTREME COLD 6121.14
## 13 TROPICAL STORM 5899.12
## 14 HURRICANE 5339.31
## 15 FLASH FLOODING 5126.05
## 16 HURRICANE/TYPHOON 4798.48
## 17 WILDFIRE 4364.20
## 18 TSTM WIND/HAIL 4356.65
## 19 WILD/FOREST FIRE 4189.54
## 20 LIGHTNING 3580.61
par(mfrow = c(1, 2), mar = c(10, 4, 2, 2), las = 3, cex = 0.7, cex.main = 1.4, cex.lab = 1.2)
barplot(totPropertySorted$Property, names.arg = totPropertySorted$Event, col = 'Brown',
main = 'Top 20 Weather Events for Property Damage ', ylab = 'Amount of Property Damage', ylim = c(0, 3500000))
barplot(totCropSorted$Crop, names.arg = totCropSorted$Event, col = 'Green',
main = 'Top 20 Weather Events for Crop Damage', ylab = 'Amount of Crop Damage', ylim = c(0, 3500000))
totTotalCost <- aggregate(SD$CROPDMG+SD$PROPDMG, by = list(SD$EVTYPE), "sum")
names(totTotalCost) <- c("Event", "TotalCost")
totTotalCostSorted = totTotalCost %>% arrange(desc(TotalCost)) %>% head(20)
totTotalCostSorted
## Event TotalCost
## 1 TORNADO 3312276.68
## 2 FLASH FLOOD 1599325.05
## 3 TSTM WIND 1445168.21
## 4 HAIL 1268289.66
## 5 FLOOD 1067976.36
## 6 THUNDERSTORM WIND 943635.62
## 7 LIGHTNING 606932.39
## 8 THUNDERSTORM WINDS 464978.11
## 9 HIGH WIND 342014.77
## 10 WINTER STORM 134699.58
## 11 HEAVY SNOW 124417.71
## 12 WILDFIRE 88823.54
## 13 ICE STORM 67689.62
## 14 STRONG WIND 64610.71
## 15 HEAVY RAIN 61964.94
## 16 HIGH WINDS 57384.60
## 17 TROPICAL STORM 54322.80
## 18 WILD/FOREST FIRE 43534.49
## 19 DROUGHT 37997.67
## 20 FLASH FLOODING 33623.20
par(mfrow = c(1,1), mar = c(10, 4, 2, 2), las = 3, cex = 0.7, cex.main = 1.4, cex.lab = 1.2)
barplot(totTotalCostSorted$TotalCost, names.arg = totTotalCostSorted$Event, col = 'Black',
main = 'Top 20 Weather Events for total Damage ', ylab = 'Amount of total Damage', ylim = c(0, 3500000))