library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("C:\\software\\R\\coursera\\reproducible-research\\peer-assignment-2\\")
stormdata <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))
str(stormdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
names(stormdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
storm <-tbl_df(stormdata)
Details of the storm data
storm
## Source: local data frame [902,297 x 37]
##
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## 7 1 11/16/1951 0:00:00 0100 CST 9 BLOUNT AL
## 8 1 1/22/1952 0:00:00 0900 CST 123 TALLAPOOSA AL
## 9 1 2/13/1952 0:00:00 2000 CST 125 TUSCALOOSA AL
## 10 1 2/13/1952 0:00:00 2000 CST 57 FAYETTE AL
## .. ... ... ... ... ... ... ...
## Variables not shown: EVTYPE (fctr), BGN_RANGE (dbl), BGN_AZI (fctr),
## BGN_LOCATI (fctr), END_DATE (fctr), END_TIME (fctr), COUNTY_END (dbl),
## COUNTYENDN (lgl), END_RANGE (dbl), END_AZI (fctr), END_LOCATI (fctr),
## LENGTH (dbl), WIDTH (dbl), F (int), MAG (dbl), FATALITIES (dbl),
## INJURIES (dbl), PROPDMG (dbl), PROPDMGEXP (fctr), CROPDMG (dbl),
## CROPDMGEXP (fctr), WFO (fctr), STATEOFFIC (fctr), ZONENAMES (fctr),
## LATITUDE (dbl), LONGITUDE (dbl), LATITUDE_E (dbl), LONGITUDE_ (dbl),
## REMARKS (fctr), REFNUM (dbl)
injury_report <-summarise(group_by(storm,STATE,EVTYPE),injury = sum(INJURIES))
c <- unique(injury_report$STATE)
v <- matrix(nrow=length(c),ncol=3)
par(mfrow=c(3,6))
par(mar=c(4,4,1,1))
for( i in 1:length(c)) {
d <- injury_report$STATE == c[i]
e <- injury_report[d, ]
barplot(e$injury,names.arg=e$EVTYPE,main=c[i])
f <- e$injury == max(e$injury)
v[i,] <- cbind(as.character(c[i]),as.character(e[f,]$EVTYPE[1]),as.character(max(e$injury)))
}
injuries <- as.data.frame(v)
names(injuries) <- c("STATE","EVENT","Injuries")
b <- !is.na(storm$F)
storm <- storm[b,]
damage <- summarise(group_by(storm,STATE, EVTYPE),damage = sum(PROPDMG))
v <- NULL
c <- unique(damage$STATE)
par(mfrow=c(3,6))
par(mar=c(4,4,1,1))
v <- matrix(nrow=length(c),ncol=3)
for( i in 1:length(c)) {
d <- damage$STATE == c[i]
e <- damage[d, ]
barplot(e$damage,names.arg=e$EVTYPE,main=c[i])
f <- e$damage == max(e$damage)
v[i,] <- cbind(as.character(c[i]),as.character(e[f,]$EVTYPE[1]),as.character(max(e$damage)))
}
damages <- as.data.frame(v)
names(damages) <- c("STATE","EVENT","Damages")
The Maximum injuries and fatalities due to severe cyclonic weather is captured in the table below across United States
print(injuries)
## STATE EVENT Injuries
## 1 AK ICE STORM 34
## 2 AL TORNADO 7929
## 3 AM MARINE THUNDERSTORM WIND 22
## 4 AN MARINE STRONG WIND 18
## 5 AR TORNADO 5116
## 6 AS TSUNAMI 129
## 7 AZ DUST STORM 179
## 8 CA WILDFIRE 623
## 9 CO TORNADO 261
## 10 CT TORNADO 703
## 11 DC EXCESSIVE HEAT 316
## 12 DE TORNADO 73
## 13 FL TORNADO 3340
## 14 GA TORNADO 3926
## 15 GM MARINE HAIL 0
## 16 GU HURRICANE/TYPHOON 333
## 17 HI STRONG WIND 20
## 18 IA TORNADO 2208
## 19 ID THUNDERSTORM WIND 74
## 20 IL TORNADO 4145
## 21 IN TORNADO 4224
## 22 KS TORNADO 2721
## 23 KY TORNADO 2806
## 24 LA TORNADO 2637
## 25 LC MARINE HAIL 0
## 26 LE MARINE HAIL 0
## 27 LH MARINE HAIL 0
## 28 LM MARINE STRONG WIND 1
## 29 LO MARINE HAIL 0
## 30 LS MARINE HAIL 0
## 31 MA TORNADO 1758
## 32 MD EXCESSIVE HEAT 461
## 33 ME LIGHTNING 70
## 34 MH HIGH SURF 1
## 35 MI TORNADO 3362
## 36 MN TORNADO 1976
## 37 MO TORNADO 4330
## 38 MS TORNADO 6244
## 39 MT WILD/FOREST FIRE 33
## 40 NC TORNADO 2536
## 41 ND TORNADO 326
## 42 NE TORNADO 1158
## 43 NH LIGHTNING 85
## 44 NJ EXCESSIVE HEAT 300
## 45 NM TORNADO 155
## 46 NV FLOOD 50
## 47 NY TORNADO 315
## 48 OH TORNADO 4438
## 49 OK TORNADO 4829
## 50 OR HIGH WIND 50
## 51 PA TORNADO 1241
## 52 PH MARINE STRONG WIND 0
## 53 PK MARINE HIGH WIND 0
## 54 PM WATERSPOUT 0
## 55 PR HEAVY RAIN 10
## 56 PZ MARINE STRONG WIND 3
## 57 RI TORNADO 23
## 58 SC TORNADO 1314
## 59 SD TORNADO 452
## 60 SL MARINE HAIL 0
## 61 ST STRONG WINDS 0
## 62 TN TORNADO 4748
## 63 TX TORNADO 8207
## 64 UT WINTER STORM 415
## 65 VA TORNADO 914
## 66 VI LIGHTNING 1
## 67 VT TSTM WIND 24
## 68 WA TORNADO 303
## 69 WI TORNADO 1601
## 70 WV TSTM WIND 142
## 71 WY WINTER STORM 119
## 72 XX MARINE THUNDERSTORM WIND 0
The property damage due to severe weather conditions is captured in the table below across all states of USA
print(damages)
## STATE EVENT Damages
## 1 AK TORNADO 0
## 2 AL TORNADO 167225.95
## 3 AR TORNADO 119262.74
## 4 AZ TORNADO 6708.66
## 5 CA TORNADO 15333.14
## 6 CO TORNADO 18199.8
## 7 CT TORNADO 4618.79
## 8 DC TORNADO 2
## 9 DE TORNADO 3370.55
## 10 FL TORNADO 157807.15
## 11 GA TORNADO 150892.96
## 12 HI TORNADO 817.75
## 13 IA TORNADO 148393.65
## 14 ID TORNADO 4792.47
## 15 IL TORNADO 127935.28
## 16 IN TORNADO 104293.43
## 17 KS TORNADO 141626.52
## 18 KY TORNADO 73844.42
## 19 LA TORNADO 131475.83
## 20 MA TORNADO 7113.57
## 21 MD TORNADO 20033.95
## 22 ME TORNADO 4495.25
## 23 MI TORNADO 70841.6
## 24 MN TORNADO 74030.08
## 25 MO TORNADO 130951.37
## 26 MS TORNADO 186375.63
## 27 MT TORNADO 7730.34
## 28 NC TORNADO 95930.19
## 29 ND TORNADO 48398.86
## 30 NE TORNADO 105968.45
## 31 NH TORNADO 5211.25
## 32 NJ TORNADO 14001.95
## 33 NM TORNADO 8576.33
## 34 NV TORNADO 1312.81
## 35 NY TORNADO 37095.64
## 36 OH TORNADO 95597.09
## 37 OK TORNADO 164764.26
## 38 OR TORNADO 2187.58
## 39 PA TORNADO 53097.5
## 40 PR TORNADO 470
## 41 RI TORNADO 1047.5
## 42 SC TORNADO 56625.1
## 43 SD TORNADO 31339.12
## 44 TN TORNADO 112160.96
## 45 TX TORNADO 280353.41
## 46 UT TORNADO 2927.42
## 47 VA TORNADO 48464.52
## 48 VT TORNADO 2949.75
## 49 WA TORNADO 3989.78
## 50 WI TORNADO 110820.07
## 51 WV TORNADO 9231.12
## 52 WY TORNADO 6795.93