The aim of this data analysis is to determine the economic and human harm from weather phenomena across the USA, using the NOAA dataset. The data was published on August 17, 2007. Two research questions will be answered, namely which event type is most detrimental when considering human harm and which event type pertains the most economic harm. Human harm will be reflected by the number of fatalities and injuries, whereas economic harm is determined by property and crop damage.

Library declaration

Data processing

Downloading the data from its source

download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2','storm_data')

Reading the data into the R work environment and exploring the data-structure with summary()

storm_data<-read.csv(bzfile('storm_data'))
summary(storm_data)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI          END_DATE        
##  Min.   :   0.000   Length:902297      Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :   1.484                                                           
##  3rd Qu.:   1.000                                                           
##  Max.   :3749.000                                                           
##                                                                             
##    END_TIME           COUNTY_END COUNTYENDN       END_RANGE       
##  Length:902297      Min.   :0    Mode:logical   Min.   :  0.0000  
##  Class :character   1st Qu.:0    NA's:902297    1st Qu.:  0.0000  
##  Mode  :character   Median :0                   Median :  0.0000  
##                     Mean   :0                   Mean   :  0.9862  
##                     3rd Qu.:0                   3rd Qu.:  0.0000  
##                     Max.   :0                   Max.   :925.0000  
##                                                                   
##    END_AZI           END_LOCATI            LENGTH              WIDTH         
##  Length:902297      Length:902297      Min.   :   0.0000   Min.   :   0.000  
##  Class :character   Class :character   1st Qu.:   0.0000   1st Qu.:   0.000  
##  Mode  :character   Mode  :character   Median :   0.0000   Median :   0.000  
##                                        Mean   :   0.2301   Mean   :   7.503  
##                                        3rd Qu.:   0.0000   3rd Qu.:   0.000  
##                                        Max.   :2315.0000   Max.   :4400.000  
##                                                                              
##        F               MAG            FATALITIES          INJURIES        
##  Min.   :0.0      Min.   :    0.0   Min.   :  0.0000   Min.   :   0.0000  
##  1st Qu.:0.0      1st Qu.:    0.0   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  Median :1.0      Median :   50.0   Median :  0.0000   Median :   0.0000  
##  Mean   :0.9      Mean   :   46.9   Mean   :  0.0168   Mean   :   0.1557  
##  3rd Qu.:1.0      3rd Qu.:   75.0   3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##  Max.   :5.0      Max.   :22000.0   Max.   :583.0000   Max.   :1700.0000  
##  NA's   :843563                                                           
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:902297      Min.   :  0.000   Length:902297     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  1.527                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000                     
##                                                                           
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 

Results

A sum of injuries and fatalities are used as a metric of human harm to sort the data-set accordingly. The ten most harmful events are displayed in tabular format. Graphically, the 30 most harmful weather events are displayed by fatality.

Human_Harm<-storm_data%>%
                group_by(EVTYPE)%>%
                summarise(Injurizes=sum(INJURIES),
                          Fatalities=sum(FATALITIES),
                          Total_harm=sum(INJURIES,FATALITIES))

Human_Harm<-arrange(Human_Harm,desc(Total_harm))

knitr::kable(Human_Harm[1:10,])
EVTYPE Injurizes Fatalities Total_harm
TORNADO 91346 5633 96979
EXCESSIVE HEAT 6525 1903 8428
TSTM WIND 6957 504 7461
FLOOD 6789 470 7259
LIGHTNING 5230 816 6046
HEAT 2100 937 3037
FLASH FLOOD 1777 978 2755
ICE STORM 1975 89 2064
THUNDERSTORM WIND 1488 133 1621
WINTER STORM 1321 206 1527

Depicting the human harm (in fatalities) from the thirthy most devastating weather events in the USA

ggplot(Human_Harm[1:30,],aes(x=fct_reorder(EVTYPE,desc(Fatalities)),y=Fatalities))+
  geom_col(aes(fill=Fatalities))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
  xlab('Event type')

#Econimical damage, setting all values of property and crop in the same order of magnitude
#this implies exchanging the K,M & B denominators to their respective numerical value

storm_data$CROPDMGEXP<-
  replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='M',1000000)

storm_data$CROPDMGEXP<-
  replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='K',1000)

storm_data$CROPDMGEXP<-
  replace(storm_data$CROPDMGEXP,storm_data$CROPDMGEXP=='B',1000000000)
############################################################################ 
storm_data$PROPDMGEXP<-
  replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='M',1000000)

storm_data$PROPDMGEXP<-
  replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='K',1000)

storm_data$PROPDMGEXP<-
  replace(storm_data$PROPDMGEXP,storm_data$PROPDMGEXP=='B',1000000000)
Economic_Harm<-storm_data%>%
                  group_by(EVTYPE)%>%
                  summarise(
                    total_crop_damage=sum(as.numeric(CROPDMG)*as.numeric(CROPDMGEXP),na.rm=T),
                    total_prop_damage=sum(as.numeric(PROPDMG)*as.numeric(PROPDMGEXP),na.rm=T),
                    total_damage=total_crop_damage+total_prop_damage)
## Warning: There were 18 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `total_crop_damage = sum(as.numeric(CROPDMG) *
##   as.numeric(CROPDMGEXP), na.rm = T)`.
## ℹ In group 155: `EVTYPE = "FLASH FLOOD WINDS"`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 17 remaining warnings.
Economic_Harm<-arrange(Economic_Harm,desc(total_damage))

knitr::kable(Economic_Harm[1:10,]) ## depicted in USD 
EVTYPE total_crop_damage total_prop_damage total_damage
FLOOD 5661968450 144657709800 150319678250
HURRICANE/TYPHOON 2607872800 69305840000 71913712800
TORNADO 414953110 56925660991 57340614101
STORM SURGE 5000 43323536000 43323541000
HAIL 3025537450 15727366870 18752904320
FLASH FLOOD 1421317100 16140812087 17562129187
DROUGHT 13972566000 1046106000 15018672000
HURRICANE 2741910000 11868319010 14610229010
RIVER FLOOD 5029459000 5118945500 10148404500
ICE STORM 5022113500 3944927810 8967041310

Depicting the total economic loss (in USD) for the thirthy most devastating weather events in the USA

ggplot(Economic_Harm[1:30,],aes(x=fct_reorder(EVTYPE,desc(total_damage)),y=total_damage))+
  geom_col(aes(fill=total_damage))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
  xlab('Event type')+
  ylab('economic harm in USD')+
  labs(fill='Total damage in USD')