First Processing the raw data obtained from the website and removing the variable which are not of the use in our analysis and continue in cleaning with the data next I have focused on effects of strom health and grouped data by different types of events and finally converted events as factor.Then plotted them.similarly for the economic affects of strom have also been done.
stormdt = read.csv("repdata_data_StormData.csv.bz2")
names(stormdt)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
str(stormdt)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
strmdtn_hlth = stormdt %>% select(STATE:EVTYPE,FATALITIES:INJURIES)
head(strmdtn_hlth)
## STATE EVTYPE FATALITIES INJURIES
## 1 AL TORNADO 0 15
## 2 AL TORNADO 0 0
## 3 AL TORNADO 0 2
## 4 AL TORNADO 0 2
## 5 AL TORNADO 0 2
## 6 AL TORNADO 0 6
strmdtn_eco = stormdt %>% select(STATE:EVTYPE,PROPDMG:CROPDMGEXP)
head(strmdtn_eco)
## STATE EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 AL TORNADO 25.0 K 0
## 2 AL TORNADO 2.5 K 0
## 3 AL TORNADO 25.0 K 0
## 4 AL TORNADO 2.5 K 0
## 5 AL TORNADO 2.5 K 0
## 6 AL TORNADO 2.5 K 0
str(strmdtn_hlth)
## 'data.frame': 902297 obs. of 4 variables:
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
hlth_eve = strmdtn_hlth %>% group_by(EVTYPE) %>% summarize(TOTINJ = sum(INJURIES),TOTFAT = sum(FATALITIES))
## `summarise()` ungrouping output (override with `.groups` argument)
head(hlth_eve)
## # A tibble: 6 x 3
## EVTYPE TOTINJ TOTFAT
## <chr> <dbl> <dbl>
## 1 " HIGH SURF ADVISORY" 0 0
## 2 " COASTAL FLOOD" 0 0
## 3 " FLASH FLOOD" 0 0
## 4 " LIGHTNING" 0 0
## 5 " TSTM WIND" 0 0
## 6 " TSTM WIND (G45)" 0 0
hlth_eve$EVTYPE=as.factor(hlth_eve$EVTYPE)
str(hlth_eve)
## tibble [985 x 3] (S3: tbl_df/tbl/data.frame)
## $ EVTYPE: Factor w/ 985 levels " HIGH SURF ADVISORY",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ TOTINJ: num [1:985] 0 0 0 0 0 0 0 0 0 0 ...
## $ TOTFAT: num [1:985] 0 0 0 0 0 0 0 0 0 0 ...
library(ggplot2)
qplot(EVTYPE,TOTFAT,data = hlth_eve,xlab = "EVENT TYPES",ylab = "TOTAL INJURIES",main = "TOTAL FATALITIES OCCURED DUE TO STORMS")
qplot(EVTYPE,TOTINJ,data = hlth_eve,xlab = "EVENT TYPES",ylab = "TOTAL INJURIES",main = " TOTAL INJURIES OCCURED DUE TO STORMS")
hlth_eve[which.max(hlth_eve$TOTINJ),]
## # A tibble: 1 x 3
## EVTYPE TOTINJ TOTFAT
## <fct> <dbl> <dbl>
## 1 TORNADO 91346 5633
hlth_eve[which.max(hlth_eve$TOTFAT),]
## # A tibble: 1 x 3
## EVTYPE TOTINJ TOTFAT
## <fct> <dbl> <dbl>
## 1 TORNADO 91346 5633
str(strmdtn_eco)
## 'data.frame': 902297 obs. of 6 variables:
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
eco_eve = strmdtn_eco %>% group_by(EVTYPE) %>% summarize(TOTPROP = sum(PROPDMG))
## `summarise()` ungrouping output (override with `.groups` argument)
head(eco_eve)
## # A tibble: 6 x 2
## EVTYPE TOTPROP
## <chr> <dbl>
## 1 " HIGH SURF ADVISORY" 200
## 2 " COASTAL FLOOD" 0
## 3 " FLASH FLOOD" 50
## 4 " LIGHTNING" 0
## 5 " TSTM WIND" 108
## 6 " TSTM WIND (G45)" 8
eco_eve$EVTYPE = as.factor(eco_eve$EVTYPE)
qplot(EVTYPE,TOTPROP,data = eco_eve)
eco_eve[which.max(eco_eve$TOTPROP),]
## # A tibble: 1 x 2
## EVTYPE TOTPROP
## <fct> <dbl>
## 1 TORNADO 3212258.
1.From above data interpretation we can see that both health status and economy of US has been affected by TORNADO