The aim of this data analysis is to determine the economic and human harm from weather phenomena across the USA, using the NOAA dataset. The data was published on August 17, 2007. Two research questions will be answered, namely which event type is most detrimental when considering human harm and which event type pertains the most economic harm. Human harm will be reflected by the number of fatalities and injuries, whereas economic harm is determined by property and crop damage.
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2','storm_data')
storm_data<-read.csv(bzfile('storm_data'))
summary(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## Min. : 0.000 Length:902297 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_TIME COUNTY_END COUNTYENDN END_RANGE
## Length:902297 Min. :0 Mode:logical Min. : 0.0000
## Class :character 1st Qu.:0 NA's:902297 1st Qu.: 0.0000
## Mode :character Median :0 Median : 0.0000
## Mean :0 Mean : 0.9862
## 3rd Qu.:0 3rd Qu.: 0.0000
## Max. :0 Max. :925.0000
##
## END_AZI END_LOCATI LENGTH WIDTH
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.000
## Mode :character Mode :character Median : 0.0000 Median : 0.000
## Mean : 0.2301 Mean : 7.503
## 3rd Qu.: 0.0000 3rd Qu.: 0.000
## Max. :2315.0000 Max. :4400.000
##
## F MAG FATALITIES INJURIES
## Min. :0.0 Min. : 0.0 Min. : 0.0000 Min. : 0.0000
## 1st Qu.:0.0 1st Qu.: 0.0 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median :1.0 Median : 50.0 Median : 0.0000 Median : 0.0000
## Mean :0.9 Mean : 46.9 Mean : 0.0168 Mean : 0.1557
## 3rd Qu.:1.0 3rd Qu.: 75.0 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :5.0 Max. :22000.0 Max. :583.0000 Max. :1700.0000
## NA's :843563
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
A sum of injuries and fatalities are used as a metric of human harm to sort the data-set accordingly. The ten most harmful events are displayed in tabular format. Graphically, the 30 most harmful weather events are displayed by fatality.
Human_Harm<-storm_data%>%
group_by(EVTYPE)%>%
summarise(Injurizes=sum(INJURIES),
Fatalities=sum(FATALITIES),
Total_harm=sum(INJURIES,FATALITIES))
Human_Harm<-arrange(Human_Harm,desc(Total_harm))
knitr::kable(Human_Harm[1:10,])
EVTYPE | Injurizes | Fatalities | Total_harm |
---|---|---|---|
TORNADO | 91346 | 5633 | 96979 |
EXCESSIVE HEAT | 6525 | 1903 | 8428 |
TSTM WIND | 6957 | 504 | 7461 |
FLOOD | 6789 | 470 | 7259 |
LIGHTNING | 5230 | 816 | 6046 |
HEAT | 2100 | 937 | 3037 |
FLASH FLOOD | 1777 | 978 | 2755 |
ICE STORM | 1975 | 89 | 2064 |
THUNDERSTORM WIND | 1488 | 133 | 1621 |
WINTER STORM | 1321 | 206 | 1527 |
ggplot(Human_Harm[1:30,],aes(x=fct_reorder(EVTYPE,desc(Fatalities)),y=Fatalities))+
geom_col(aes(fill=Fatalities))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
xlab('Event type')
#Econimical damage, setting all values of property and crop in the same order of magnitude
#this implies exchanging the K,M & B denominators to their respective numerical value
storm_data$CROPDMGEXP<-
replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='M',1000000)
storm_data$CROPDMGEXP<-
replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='K',1000)
storm_data$CROPDMGEXP<-
replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='B',1000000000)
############################################################################
storm_data$PROPDMGEXP<-
replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='M',1000000)
storm_data$PROPDMGEXP<-
replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='K',1000)
storm_data$PROPDMGEXP<-
replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='B',1000000000)
Economic_Harm<-storm_data%>%
group_by(EVTYPE)%>%
summarise(
total_crop_damage=sum(as.numeric(CROPDMG)*as.numeric(CROPDMGEXP),na.rm=T),
total_prop_damage=sum(as.numeric(PROPDMG)*as.numeric(PROPDMGEXP),na.rm=T),
total_damage=total_crop_damage+total_prop_damage)
## Warning: There were 18 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `total_crop_damage = sum(as.numeric(CROPDMG) *
## as.numeric(CROPDMGEXP), na.rm = T)`.
## ℹ In group 155: `EVTYPE = "FLASH FLOOD WINDS"`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 17 remaining warnings.
Economic_Harm<-arrange(Economic_Harm,desc(total_damage))
knitr::kable(Economic_Harm[1:10,]) ## depicted in USD
EVTYPE | total_crop_damage | total_prop_damage | total_damage |
---|---|---|---|
FLOOD | 5661968450 | 144657709800 | 150319678250 |
HURRICANE/TYPHOON | 2607872800 | 69305840000 | 71913712800 |
TORNADO | 414953110 | 56925660991 | 57340614101 |
STORM SURGE | 5000 | 43323536000 | 43323541000 |
HAIL | 3025537450 | 15727366870 | 18752904320 |
FLASH FLOOD | 1421317100 | 16140812087 | 17562129187 |
DROUGHT | 13972566000 | 1046106000 | 15018672000 |
HURRICANE | 2741910000 | 11868319010 | 14610229010 |
RIVER FLOOD | 5029459000 | 5118945500 | 10148404500 |
ICE STORM | 5022113500 | 3944927810 | 8967041310 |
ggplot(Economic_Harm[1:30,],aes(x=fct_reorder(EVTYPE,desc(total_damage)),y=total_damage))+
geom_col(aes(fill=total_damage))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
xlab('Event type')+
ylab('economic harm in USD')+
labs(fill='Total damage in USD')