SYNOPSIS

Data anlaysis of the storm database.It deals with the storms and other severe weather events which affects the public health and economic damages.So,this assignment aims to explore the NOAA Storm Database and answer some basic questions about severe weather events. The events in the database start in the year 1950 and end in November 2011.Dataset consists of the variable like FATALITIES ,INJURIES which decribes the adverse effects on the humman population caused by different EVTYPE and the variable CROPDMG,PROPDMG for the damage of property and the crops .

PURPOSE

The purpose of this assignment is -to make us learn to generate RMarkdown document -to know the importance of reproducibility -to analyse the data and produce answers for the given questions

DATA PROCSSING

Loading the given dataset.

knitr::opts_chunk$set(echo = TRUE)

rm(list=ls())
getwd()
## [1] "/Users/soni/Desktop/DATA_SCIENCE specialization/Reproducible Research/WEEK 4"
data_df<-read.csv("repdata%2Fdata%2FStormData.csv",stringsAsFactors = FALSE)
head(data_df)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
summary(data_df)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI       
##  Min.   :   0.000   Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character  
##  Mean   :   1.484                                        
##  3rd Qu.:   1.000                                        
##  Max.   :3749.000                                        
##                                                          
##    END_DATE           END_TIME           COUNTY_END COUNTYENDN    
##  Length:902297      Length:902297      Min.   :0    Mode:logical  
##  Class :character   Class :character   1st Qu.:0    NA's:902297   
##  Mode  :character   Mode  :character   Median :0                  
##                                        Mean   :0                  
##                                        3rd Qu.:0                  
##                                        Max.   :0                  
##                                                                   
##    END_RANGE          END_AZI           END_LOCATI       
##  Min.   :  0.0000   Length:902297      Length:902297     
##  1st Qu.:  0.0000   Class :character   Class :character  
##  Median :  0.0000   Mode  :character   Mode  :character  
##  Mean   :  0.9862                                        
##  3rd Qu.:  0.0000                                        
##  Max.   :925.0000                                        
##                                                          
##      LENGTH              WIDTH                F               MAG         
##  Min.   :   0.0000   Min.   :   0.000   Min.   :0.0      Min.   :    0.0  
##  1st Qu.:   0.0000   1st Qu.:   0.000   1st Qu.:0.0      1st Qu.:    0.0  
##  Median :   0.0000   Median :   0.000   Median :1.0      Median :   50.0  
##  Mean   :   0.2301   Mean   :   7.503   Mean   :0.9      Mean   :   46.9  
##  3rd Qu.:   0.0000   3rd Qu.:   0.000   3rd Qu.:1.0      3rd Qu.:   75.0  
##  Max.   :2315.0000   Max.   :4400.000   Max.   :5.0      Max.   :22000.0  
##                                         NA's   :843563                    
##    FATALITIES          INJURIES            PROPDMG       
##  Min.   :  0.0000   Min.   :   0.0000   Min.   :   0.00  
##  1st Qu.:  0.0000   1st Qu.:   0.0000   1st Qu.:   0.00  
##  Median :  0.0000   Median :   0.0000   Median :   0.00  
##  Mean   :  0.0168   Mean   :   0.1557   Mean   :  12.06  
##  3rd Qu.:  0.0000   3rd Qu.:   0.0000   3rd Qu.:   0.50  
##  Max.   :583.0000   Max.   :1700.0000   Max.   :5000.00  
##                                                          
##   PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Length:902297      Min.   :  0.000   Length:902297     
##  Class :character   1st Qu.:  0.000   Class :character  
##  Mode  :character   Median :  0.000   Mode  :character  
##                     Mean   :  1.527                     
##                     3rd Qu.:  0.000                     
##                     Max.   :990.000                     
##                                                         
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 
str(data_df)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
table(data_df$FATALITIES)
## 
##      0      1      2      3      4      5      6      7      8      9 
## 895323   5010    996    314    166    114     71     53     33     30 
##     10     11     12     13     14     15     16     17     18     19 
##     30     24     12     13     12      5     11      7      2      2 
##     20     21     22     23     24     25     26     27     29     30 
##      7      3      6      3      4      5      1      3      3      3 
##     31     32     33     34     36     37     38     42     44     46 
##      3      3      3      1      1      1      1      3      1      1 
##     49     50     57     67     74     75     90     99    114    116 
##      1      1      2      1      1      1      1      1      1      1 
##    158    583 
##      1      1
table(data_df$INJURIES)
## 
##      0      1      2      3      4      5      6      7      8      9 
## 884693   7756   3134   1552    931    709    529    280    255    186 
##     10     11     12     13     14     15     16     17     18     19 
##    271    109    181     84     84    138     51     57     52     30 
##     20     21     22     23     24     25     26     27     28     29 
##    130     28     40     26     37     65     31     23     25     16 
##     30     31     32     33     34     35     36     37     38     39 
##     67     12     19     13     10     24     12     13     10      8 
##     40     41     42     43     44     45     46     47     48     49 
##     48      8     12      7      6     14      7     10      8      7 
##     50     51     52     53     54     55     56     57     58     59 
##     58      6      5      9      4     12      4      7      6      9 
##     60     61     62     63     64     65     66     67     68     69 
##     17      4      4      3      4     10      1      3      2      2 
##     70     71     72     73     74     75     76     77     78     79 
##     13      2      4      2      1     14      2      2      3      1 
##     80     81     82     83     85     87     88     89     90     91 
##     11      2      1      1      2      1      3      2      6      1 
##     92     93     94     95     96     97     98    100    101    102 
##      1      2      1      2      1      3      1     34      1      2 
##    103    104    105    106    108    109    110    111    112    115 
##      2      1      1      1      1      1      3      1      3      2 
##    116    118    119    120    121    122    123    125    129    130 
##      2      1      1      3      2      2      1      2      2      4 
##    135    136    137    138    140    142    143    144    145    150 
##      2      1      3      1      3      1      1      1      1     12 
##    152    153    154    156    159    160    165    166    170    172 
##      1      2      1      1      1      1      2      1      2      1 
##    175    176    177    180    181    185    190    192    195    200 
##      4      1      2      3      1      2      1      2      1     20 
##    207    210    215    216    223    224    225    230    234    240 
##      1      1      1      1      1      1      2      1      1      1 
##    241    246    250    252    257    258    266    270    275    280 
##      1      1      3      2      2      1      1      3      1      2 
##    293    300    306    316    325    342    350    385    397    410 
##      1      5      1      1      1      1      4      1      1      1 
##    411    437    450    463    500    504    519    550    560    597 
##      1      1      3      1      7      1      1      1      1      1 
##    600    700    750    780    785    800   1150   1228   1568   1700 
##      1      1      1      1      1      2      2      1      1      1

DATA PROCESSING

Question 1:Across the United States, which types of events (as indicated in the ???????????????????????? variable) are most harmful with respect to population health? -CALUCULATION OF INJURIES AND THE FATALITIES. -Summing up all the injuries and Falaties accoring to the event type.

knitr::opts_chunk$set(echo = TRUE)
#CALUCULATION OF INJURIES AND THE FATALITIES
# Summing up all the injuries and Falaties accoring to the eventype
injury_subset<-aggregate(cbind(FATALITIES,INJURIES)~EVTYPE,data = data_df,sum,na.rm=TRUE)

#Subetting the injuries and fatalities which are greater than 0
injury_subset1<-injury_subset[injury_subset$FATALITIES>0,]
injury_subset2<-injury_subset[injury_subset$INJURIES>0,]
# merging both datasets to make a complete dataset of fatalities and injuries
injury_subset_df<-merge(injury_subset1,injury_subset2)

#Ordering the datasetand taking out the first 10 elements
injury_subset_df_order<-injury_subset_df[order(injury_subset_df$INJURIES,injury_subset_df$FATALITIES,decreasing = TRUE),]
injury_subset_df_order<-injury_subset_df_order[1:10,]
print(injury_subset_df_order)
##               EVTYPE FATALITIES INJURIES
## 85           TORNADO       5633    91346
## 88         TSTM WIND        504     6957
## 22             FLOOD        470     6789
## 13    EXCESSIVE HEAT       1903     6525
## 58         LIGHTNING        816     5230
## 34              HEAT        937     2100
## 53         ICE STORM         89     1975
## 20       FLASH FLOOD        978     1777
## 83 THUNDERSTORM WIND        133     1488
## 33              HAIL         15     1361

RESULTS

Plot showing the Fatalitis and injuries accross diferent event type SHOWING THE FATALITIES AND INJURIES ACROSS TEN DIFFERENT EVENTS OCCURED

???????????????

knitr::opts_chunk$set(echo = TRUE)

events_column <- injury_subset_df_order$EVTYPE

barplot(t(injury_subset_df_order[,-1]), names.arg = events_column, ylim = c(0,92000), beside = T, cex.names = 0.8, las=2, col = c("light blue", "pink"), main="Top 10 Disaster Casualties")
legend("topright",c("Fatalities","Injuries"),fill=c("light blue","pink"),bty = "n")

DATA PROCESSING

checking the data in the PROPDMGEXP CROPDMGEXP variables Harmful Events causing greatest economic consequences QUESTION 2: Across the United States, which types of events have the greatest economic consequence

knitr::opts_chunk$set(echo = TRUE)
table(data_df$CROPDMGEXP)
## 
##             0      2      ?      B      K      M      k      m 
## 618413     19      1      7      9 281832   1994     21      1
knitr::opts_chunk$set(echo = TRUE)
# subsetting thr data which has more values to affect the economic lfe
#convert the exponents into numeric value and thus calculate the property damage

data_df$PROPDMG[data_df$PROPDMGEXP=="K"]<-data_df$PROPDMG[data_df$PROPDMGEXP== "K"]*1000
data_df$PROPDMG[data_df$PROPDMGEXP=="M"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="M"]*(10^6)
data_df$PROPDMG[data_df$PROPDMGEXP=="B"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="B"]*(10^9)
data_df$PROPDMG[data_df$PROPDMGEXP=="H"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="H"]*100
data_df$PROPDMG[data_df$PROPDMGEXP=="m"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="m"]*(10^6)
head(data_df[,c("EVTYPE","PROPDMG","PROPDMGEXP")])
##    EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO   25000          K
## 2 TORNADO    2500          K
## 3 TORNADO   25000          K
## 4 TORNADO    2500          K
## 5 TORNADO    2500          K
## 6 TORNADO    2500          K
damage_prop<-aggregate(PROPDMG~EVTYPE,data =data_df,sum,na.rm=TRUE)
damage_prop_order<-damage_prop[with(damage_prop,order(-PROPDMG)),]
damage_prop_order1<-head(damage_prop_order,10)
print(damage_prop_order1)
##                EVTYPE      PROPDMG
## 167             FLOOD 144657709807
## 393 HURRICANE/TYPHOON  69305840000
## 826           TORNADO  56937160779
## 656       STORM SURGE  43323536000
## 151       FLASH FLOOD  16140812067
## 241              HAIL  15732267543
## 385         HURRICANE  11868319010
## 839    TROPICAL STORM   7703890550
## 962      WINTER STORM   6688497251
## 343         HIGH WIND   5270046295
knitr::opts_chunk$set(echo = TRUE)
# subsetting thr data which has more values to affect the economic lfe
#convert the exponents into numeric value and thus calculate the crop damage

data_df$CROPDMG[data_df$CROPDMGEXP=="K"]<-data_df$CROPDMG[data_df$CROPDMGEXP== "K"]*1000
data_df$CROPDMG[data_df$CROPDMGEXP=="M"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="M"]*(10^6)
data_df$CROPDMG[data_df$CROPDMGEXP=="B"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="B"]*(10^9)
data_df$CROPDMG[data_df$CROPDMGEXP=="k"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="k"]*(1000)

head(data_df[,c("EVTYPE","CROPDMG","CROPDMGEXP")])
##    EVTYPE CROPDMG CROPDMGEXP
## 1 TORNADO       0           
## 2 TORNADO       0           
## 3 TORNADO       0           
## 4 TORNADO       0           
## 5 TORNADO       0           
## 6 TORNADO       0
damage_crop<-aggregate(CROPDMG~EVTYPE,data =data_df,sum,na.rm=TRUE)
damage_crop_order<-damage_crop[with(damage_crop,order(-CROPDMG)),]
damage_crop_order1<-head(damage_crop_order,10)
print(damage_crop_order1)
##                EVTYPE     CROPDMG
## 91            DROUGHT 13972566000
## 167             FLOOD  5661968450
## 577       RIVER FLOOD  5029459000
## 422         ICE STORM  5022113500
## 241              HAIL  3025954473
## 385         HURRICANE  2741910000
## 393 HURRICANE/TYPHOON  2607872800
## 151       FLASH FLOOD  1421317100
## 132      EXTREME COLD  1292973000
## 198      FROST/FREEZE  1094086000

RESULTS

Plot showing the property damage accross diferent event type

knitr::opts_chunk$set(echo = TRUE)


#plotting
par(mfrow=c(1,2),mar=c(11,3,3,2))
##plot the graph showing the top 10 property damages
barplot(damage_prop_order1$PROPDMG/(10^9),names.arg=damage_prop_order1$EVTYPE,las=2,col="purple",ylab="Prop.damage(billions)",main="Events Vs Top10 Prop.Damages")

RESULTS

Plot showing the crop damage accross diferent event type

knitr::opts_chunk$set(echo = TRUE)
##plot the graph showing the top 10 crop damages
barplot(damage_crop_order1$CROPDMG/(10^9),names.arg=damage_crop_order1$EVTYPE,las=2,col="RED",ylab="Crop damage(billions)",main="Events Vs Top10 Crop.Damages")