##Sypnosis
Storms and other extreme weather conditions can harm the public’s health as well as towns’ economies. Preventing such consequences to the greatest extent feasible is a major issue because many extreme occurrences have the potential to cause lives, injuries, and property damage.
In this report, we want to respond to two issues-related queries. First, we consider the occurrences that are most detrimental to population health in the United States. Second, we look at which events around the country have the most economic impacts.
The U.S. National Oceanic and Atmospheric Administration (NOAA) storm database, which keeps track of the characteristics of significant storms and meteorological occurrences in the country, was utilized to conduct the investigation.
The findings in this report include the most harmful weather events with respect to public health and economic damage using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. ## Load and process the data
USstorms = read.csv("repdata_data_StormData.csv",header=TRUE, stringsAsFactors = FALSE)
cat("this data has ",nrow(USstorms), "observations and ", ncol(USstorms), "variables")
## this data has 902297 observations and 37 variables
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
Create variables to demonstrate destruction in cash
in.cash = function(dmg,exp){
if (exp=="H" | exp=="h"){
return(dmg*100)
} else if ( exp=="K" | exp=="k"){
return(dmg*1000)
} else if ( exp=="M" | exp =="m"){
return (dmg*1e+6)
}else if (exp == "B" | exp=="b"){
return (dmg=1e+8)
}
else if (exp==""){
return(0)
}
else {return (NA)}
}
USstorms$PROPCASH= sapply(1:nrow(USstorms), function(i) in.cash(USstorms$PROPDMG[i],
USstorms$PROPDMGEXP[i]))
USstorms$CROPCASH= sapply(1:nrow(USstorms), function(i) in.cash(USstorms$CROPDMG[i],
USstorms$CROPDMGEXP[i]))
USstorms$TOTALDMG=USstorms$PROPCASH + USstorms$CROPCASH
Extract year and month from the data
USstorms$month=format(strptime(USstorms$BGN_DATE,"%m/%d/%Y %H:%M:%S"),"%b")
USstorms$year=format(strptime(USstorms$BGN_DATE,"%m/%d/%Y %H:%M:%S"),"%Y")
Take a look at the processed data from the variables of interest
head(USstorms[,c("EVTYPE", "COUNTYNAME", "FATALITIES", "INJURIES", "PROPCASH",
"CROPCASH","month","year","TOTALDMG")])
## EVTYPE COUNTYNAME FATALITIES INJURIES PROPCASH CROPCASH month year TOTALDMG
## 1 TORNADO MOBILE 0 15 25000 0 apr 1950 25000
## 2 TORNADO BALDWIN 0 0 2500 0 apr 1950 2500
## 3 TORNADO FAYETTE 0 2 25000 0 feb 1951 25000
## 4 TORNADO MADISON 0 2 2500 0 jun 1951 2500
## 5 TORNADO CULLMAN 0 2 2500 0 nov 1951 2500
## 6 TORNADO LAUDERDALE 0 6 2500 0 nov 1951 2500
events.by.type= USstorms %>% group_by(EVTYPE) %>% summarise(total.events=n()) %>% arrange(desc(total.events))
events.by.damage=USstorms %>% group_by(EVTYPE) %>% summarise(total.dmg=TOTALDMG) %>% arrange(desc(total.dmg))
## `summarise()` has grouped output by 'EVTYPE'. You can override using the
## `.groups` argument.
head(events.by.type, 10)
## # A tibble: 10 × 2
## EVTYPE total.events
## <chr> <int>
## 1 HAIL 288661
## 2 TSTM WIND 219940
## 3 THUNDERSTORM WIND 82563
## 4 TORNADO 60652
## 5 FLASH FLOOD 54277
## 6 FLOOD 25326
## 7 THUNDERSTORM WINDS 20843
## 8 HIGH WIND 20212
## 9 LIGHTNING 15754
## 10 HEAVY SNOW 15708
head(events.by.damage, 10)
## # A tibble: 10 × 2
## # Groups: EVTYPE [7]
## EVTYPE total.dmg
## <chr> <dbl>
## 1 HIGH WIND 1104000000
## 2 FLOOD 950000000
## 3 HAIL 900000000
## 4 HURRICANE 824220000
## 5 HURRICANE 792150000
## 6 FLOOD 750000000
## 7 THUNDERSTORM WIND 750000000
## 8 WINTER STORM 750000000
## 9 HURRICANE/TYPHOON 711000000
## 10 HIGH WIND 702000000
Let’s plot the pattern of top ten frequent events in a plot
ten.events.by.year=USstorms %>% filter(EVTYPE %in% c("HAIL", "TSTM WIND", "THUNDERSTORM WIND",
"TORNADO", "FLASH FLOOD", "FLOOD", "THUNDERSTORM WINDS", "HIGH WIND", "HEAT",
"EXCESSIVE HEAT")) %>% group_by(year,EVTYPE) %>% summarise(total=n())
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
g1=ggplot(ten.events.by.year,aes(x=as.numeric(year),y=total,group=1))+geom_line()+facet_wrap(~EVTYPE,ncol=2,nrow=5) + labs(title = "Number of occurences of weather events in the United States",
x = "Year", y = "Number of occurences (logarytmic scale)") +
scale_y_log10() + scale_x_continuous(breaks=seq(min(ten.events.by.year$year), max(ten.events.by.year$year), by=10))
g1
### Let check what type of weather events are destructive to the
population health
pop.health=USstorms %>% group_by(EVTYPE) %>% summarise(total.events=n(), total.injuries=sum(INJURIES), total.death=sum(FATALITIES)) %>% arrange(desc(total.death))
head(pop.health)
## # A tibble: 6 × 4
## EVTYPE total.events total.injuries total.death
## <chr> <int> <dbl> <dbl>
## 1 TORNADO 60652 91346 5633
## 2 EXCESSIVE HEAT 1678 6525 1903
## 3 FLASH FLOOD 54277 1777 978
## 4 HEAT 767 2100 937
## 5 LIGHTNING 15754 5230 816
## 6 TSTM WIND 219940 6957 504
As it is shown: - Tornado seems to be the deadliest events followed by the Excessive heat events - HEAT events only occurred 767 times but these events caused 937 deaths, ranked number 4th in the list
Now let check the pattern of these 6 events during the years spanning from 1950 to 2010
pop.health.by.year= USstorms %>% filter(EVTYPE %in% head(pop.health$EVTYPE)) %>% group_by(year,EVTYPE) %>% summarise(total.death=sum(FATALITIES), total.injuries= sum(INJURIES))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
g3= pop.health.by.year %>% gather(key,value, total.death, total.injuries) %>%
ggplot(aes(x=as.numeric(year), y=value, colour=key, group=1)) +
geom_line() + facet_wrap(~EVTYPE, ncol=2, nrow=3) + scale_y_log10()+ scale_x_continuous(breaks=seq(min(ten.events.by.year$year), max(ten.events.by.year$year), by=10)) + labs(title = "Sum of injuries and fatilities for selected weather event types \nin the United States",
x= "Year" , y = "Number of occurences (logarytmic scale)")
g3
## Warning: Transformation introduced infinite values in continuous y-axis
The plot depicted that among those harsh weather events, Tornado and
lightning caused deaths and injuries on a regular basis throughout the
year.
And have another look into which counties experienced the highest fatalities in the last 20 years (1990-2010)
fatalities.by.state = USstorms %>% select(STATE,EVTYPE,FATALITIES,INJURIES,year) %>% filter(year>1990) %>% group_by(STATE) %>% summarise(total.injuries=sum(INJURIES), total.fatalities = sum(FATALITIES)) %>% arrange(desc(total.fatalities))
head(fatalities.by.state,10)
## # A tibble: 10 × 3
## STATE total.injuries total.fatalities
## <chr> <dbl> <dbl>
## 1 IL 1822 1230
## 2 TX 10254 871
## 3 PA 2151 767
## 4 FL 3524 671
## 5 MO 6801 602
## 6 CA 3197 550
## 7 AL 4445 517
## 8 TN 2778 352
## 9 NY 1103 316
## 10 NC 1694 306
Let’s find out the top 10 extreme weather events to the economy loss
economy.loss = USstorms %>% select(EVTYPE,CROPCASH,PROPCASH,TOTALDMG) %>% group_by(EVTYPE) %>% summarise(total.crop.loss=sum(CROPCASH),total.prop.loss=sum(PROPCASH), total.loss = sum(TOTALDMG)) %>% arrange(desc(total.loss))
head(economy.loss,10)
## # A tibble: 10 × 4
## EVTYPE total.crop.loss total.prop.loss total.loss
## <chr> <dbl> <dbl> <dbl>
## 1 HURRICANE 2741910000 6468319010 9210229010
## 2 HURRICANE/TYPHOON 1197872800 5005840000 6203712800
## 3 WILDFIRE 295472800 3825114000 4120586800
## 4 TROPICAL STORM 678346000 2653890550 3332236550
## 5 WILD/FOREST FIRE 106796830 1601829500 1708626330
## 6 HEAVY RAIN 733399800 694248090 1427647890
## 7 EXTREME COLD 1292973000 67737400 1360710400
## 8 FROST/FREEZE 1094086000 9480000 1103566000
## 9 STORM SURGE 5000 963536000 963541000
## 10 BLIZZARD 112060000 659213950 771273950
Now let’s plot the pattern of these 10 events that cause the most economical damage.
economy.loss.by.year = USstorms %>% filter(EVTYPE %in% c("HURRICANE","HURRICANE/TYPHOON","WILDFIRE","TROPICAL STORM","WILD/FOREST FIRE", "HEAVY RAIN","EXTREME COLD", "FROST/FREEZE", "STORM SURGE","BLIZZARD")) %>% group_by(year,EVTYPE) %>% summarise(total.crop.loss=sum(CROPCASH),total.prop.loss=sum(PROPCASH))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
g4=economy.loss.by.year %>% gather(key,value, total.crop.loss, total.prop.loss) %>% ggplot(aes(x=as.numeric(year),y=value, colour=key, group=1)) + geom_line()+labs(title = "Sum of crop and properties loss (in cash) for selected weather event types \nin the United States",
x= "Year" , y = "Cash loss (logarytmic scale))") + scale_y_log10() + scale_x_continuous(breaks=seq(min(economy.loss.by.year$year),max(economy.loss.by.year$year),by=10)) + facet_wrap(~EVTYPE, ncol=2, nrow=5)
g4
## Warning: Transformation introduced infinite values in continuous y-axis
As it can be seen from the plot, Tropical storms and Heavy Rains seem to
cause the regular loss on a yearly basis.
And have another look into which counties experienced the highest economy loss in the last 20 years (1990-2010)
economy.loss.by.state = USstorms %>% filter(year>1990) %>% group_by(STATE) %>% summarise(total.crop.loss=sum(CROPCASH),total.prop.loss=sum(PROPCASH),total.loss=sum(TOTALDMG)) %>% arrange(desc(total.loss))
head(economy.loss.by.state,10)
## # A tibble: 10 × 4
## STATE total.crop.loss total.prop.loss total.loss
## <chr> <dbl> <dbl> <dbl>
## 1 LA 1228706000 4922845750 6151551750
## 2 AL 306769240 4610775050 4917544290
## 3 MN 325023300 4407296470 4732319770
## 4 NY 221263600 4458566460 4679830060
## 5 MS 299807600 3825334380 4125141980
## 6 AR 174790010 3456625330 3631415340
## 7 NJ 99265000 3139667450 3238932450
## 8 ND 549730000 2353639650 2903369650
## 9 KY 294892700 2418610910 2713503610
## 10 AZ 214161000 2009016100 2223177100