Data anlaysis of the storm database.It deals with the storms and other severe weather events which affects the public health and economic damages.So,this assignment aims to explore the NOAA Storm Database and answer some basic questions about severe weather events. The events in the database start in the year 1950 and end in November 2011.Dataset consists of the variable like FATALITIES ,INJURIES which decribes the adverse effects on the humman population caused by different EVTYPE and the variable CROPDMG,PROPDMG for the damage of property and the crops .
The purpose of this assignment is -to make us learn to generate RMarkdown document -to know the importance of reproducibility -to analyse the data and produce answers for the given questions
Loading the given dataset.
knitr::opts_chunk$set(echo = TRUE)
rm(list=ls())
getwd()
## [1] "/Users/soni/Desktop/DATA_SCIENCE specialization/Reproducible Research/WEEK 4"
data_df<-read.csv("repdata%2Fdata%2FStormData.csv",stringsAsFactors = FALSE)
head(data_df)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
summary(data_df)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI
## Min. : 0.000 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character
## Median : 0.000 Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_DATE END_TIME COUNTY_END COUNTYENDN
## Length:902297 Length:902297 Min. :0 Mode:logical
## Class :character Class :character 1st Qu.:0 NA's:902297
## Mode :character Mode :character Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
##
## END_RANGE END_AZI END_LOCATI
## Min. : 0.0000 Length:902297 Length:902297
## 1st Qu.: 0.0000 Class :character Class :character
## Median : 0.0000 Mode :character Mode :character
## Mean : 0.9862
## 3rd Qu.: 0.0000
## Max. :925.0000
##
## LENGTH WIDTH F MAG
## Min. : 0.0000 Min. : 0.000 Min. :0.0 Min. : 0.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.:0.0 1st Qu.: 0.0
## Median : 0.0000 Median : 0.000 Median :1.0 Median : 50.0
## Mean : 0.2301 Mean : 7.503 Mean :0.9 Mean : 46.9
## 3rd Qu.: 0.0000 3rd Qu.: 0.000 3rd Qu.:1.0 3rd Qu.: 75.0
## Max. :2315.0000 Max. :4400.000 Max. :5.0 Max. :22000.0
## NA's :843563
## FATALITIES INJURIES PROPDMG
## Min. : 0.0000 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 0.0000 Median : 0.0000 Median : 0.00
## Mean : 0.0168 Mean : 0.1557 Mean : 12.06
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.50
## Max. :583.0000 Max. :1700.0000 Max. :5000.00
##
## PROPDMGEXP CROPDMG CROPDMGEXP
## Length:902297 Min. : 0.000 Length:902297
## Class :character 1st Qu.: 0.000 Class :character
## Mode :character Median : 0.000 Mode :character
## Mean : 1.527
## 3rd Qu.: 0.000
## Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
str(data_df)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
table(data_df$FATALITIES)
##
## 0 1 2 3 4 5 6 7 8 9
## 895323 5010 996 314 166 114 71 53 33 30
## 10 11 12 13 14 15 16 17 18 19
## 30 24 12 13 12 5 11 7 2 2
## 20 21 22 23 24 25 26 27 29 30
## 7 3 6 3 4 5 1 3 3 3
## 31 32 33 34 36 37 38 42 44 46
## 3 3 3 1 1 1 1 3 1 1
## 49 50 57 67 74 75 90 99 114 116
## 1 1 2 1 1 1 1 1 1 1
## 158 583
## 1 1
table(data_df$INJURIES)
##
## 0 1 2 3 4 5 6 7 8 9
## 884693 7756 3134 1552 931 709 529 280 255 186
## 10 11 12 13 14 15 16 17 18 19
## 271 109 181 84 84 138 51 57 52 30
## 20 21 22 23 24 25 26 27 28 29
## 130 28 40 26 37 65 31 23 25 16
## 30 31 32 33 34 35 36 37 38 39
## 67 12 19 13 10 24 12 13 10 8
## 40 41 42 43 44 45 46 47 48 49
## 48 8 12 7 6 14 7 10 8 7
## 50 51 52 53 54 55 56 57 58 59
## 58 6 5 9 4 12 4 7 6 9
## 60 61 62 63 64 65 66 67 68 69
## 17 4 4 3 4 10 1 3 2 2
## 70 71 72 73 74 75 76 77 78 79
## 13 2 4 2 1 14 2 2 3 1
## 80 81 82 83 85 87 88 89 90 91
## 11 2 1 1 2 1 3 2 6 1
## 92 93 94 95 96 97 98 100 101 102
## 1 2 1 2 1 3 1 34 1 2
## 103 104 105 106 108 109 110 111 112 115
## 2 1 1 1 1 1 3 1 3 2
## 116 118 119 120 121 122 123 125 129 130
## 2 1 1 3 2 2 1 2 2 4
## 135 136 137 138 140 142 143 144 145 150
## 2 1 3 1 3 1 1 1 1 12
## 152 153 154 156 159 160 165 166 170 172
## 1 2 1 1 1 1 2 1 2 1
## 175 176 177 180 181 185 190 192 195 200
## 4 1 2 3 1 2 1 2 1 20
## 207 210 215 216 223 224 225 230 234 240
## 1 1 1 1 1 1 2 1 1 1
## 241 246 250 252 257 258 266 270 275 280
## 1 1 3 2 2 1 1 3 1 2
## 293 300 306 316 325 342 350 385 397 410
## 1 5 1 1 1 1 4 1 1 1
## 411 437 450 463 500 504 519 550 560 597
## 1 1 3 1 7 1 1 1 1 1
## 600 700 750 780 785 800 1150 1228 1568 1700
## 1 1 1 1 1 2 2 1 1 1
Question 1:Across the United States, which types of events (as indicated in the ???????????????????????? variable) are most harmful with respect to population health? -CALUCULATION OF INJURIES AND THE FATALITIES. -Summing up all the injuries and Falaties accoring to the event type.
knitr::opts_chunk$set(echo = TRUE)
#CALUCULATION OF INJURIES AND THE FATALITIES
# Summing up all the injuries and Falaties accoring to the eventype
injury_subset<-aggregate(cbind(FATALITIES,INJURIES)~EVTYPE,data = data_df,sum,na.rm=TRUE)
#Subetting the injuries and fatalities which are greater than 0
injury_subset1<-injury_subset[injury_subset$FATALITIES>0,]
injury_subset2<-injury_subset[injury_subset$INJURIES>0,]
# merging both datasets to make a complete dataset of fatalities and injuries
injury_subset_df<-merge(injury_subset1,injury_subset2)
#Ordering the datasetand taking out the first 10 elements
injury_subset_df_order<-injury_subset_df[order(injury_subset_df$INJURIES,injury_subset_df$FATALITIES,decreasing = TRUE),]
injury_subset_df_order<-injury_subset_df_order[1:10,]
print(injury_subset_df_order)
## EVTYPE FATALITIES INJURIES
## 85 TORNADO 5633 91346
## 88 TSTM WIND 504 6957
## 22 FLOOD 470 6789
## 13 EXCESSIVE HEAT 1903 6525
## 58 LIGHTNING 816 5230
## 34 HEAT 937 2100
## 53 ICE STORM 89 1975
## 20 FLASH FLOOD 978 1777
## 83 THUNDERSTORM WIND 133 1488
## 33 HAIL 15 1361
Plot showing the Fatalitis and injuries accross diferent event type SHOWING THE FATALITIES AND INJURIES ACROSS TEN DIFFERENT EVENTS OCCURED
???????????????
knitr::opts_chunk$set(echo = TRUE)
events_column <- injury_subset_df_order$EVTYPE
barplot(t(injury_subset_df_order[,-1]), names.arg = events_column, ylim = c(0,92000), beside = T, cex.names = 0.8, las=2, col = c("light blue", "pink"), main="Top 10 Disaster Casualties")
legend("topright",c("Fatalities","Injuries"),fill=c("light blue","pink"),bty = "n")
checking the data in the PROPDMGEXP CROPDMGEXP variables Harmful Events causing greatest economic consequences QUESTION 2: Across the United States, which types of events have the greatest economic consequence
knitr::opts_chunk$set(echo = TRUE)
table(data_df$CROPDMGEXP)
##
## 0 2 ? B K M k m
## 618413 19 1 7 9 281832 1994 21 1
knitr::opts_chunk$set(echo = TRUE)
# subsetting thr data which has more values to affect the economic lfe
#convert the exponents into numeric value and thus calculate the property damage
data_df$PROPDMG[data_df$PROPDMGEXP=="K"]<-data_df$PROPDMG[data_df$PROPDMGEXP== "K"]*1000
data_df$PROPDMG[data_df$PROPDMGEXP=="M"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="M"]*(10^6)
data_df$PROPDMG[data_df$PROPDMGEXP=="B"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="B"]*(10^9)
data_df$PROPDMG[data_df$PROPDMGEXP=="H"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="H"]*100
data_df$PROPDMG[data_df$PROPDMGEXP=="m"]<-data_df$PROPDMG[data_df$PROPDMGEXP=="m"]*(10^6)
head(data_df[,c("EVTYPE","PROPDMG","PROPDMGEXP")])
## EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO 25000 K
## 2 TORNADO 2500 K
## 3 TORNADO 25000 K
## 4 TORNADO 2500 K
## 5 TORNADO 2500 K
## 6 TORNADO 2500 K
damage_prop<-aggregate(PROPDMG~EVTYPE,data =data_df,sum,na.rm=TRUE)
damage_prop_order<-damage_prop[with(damage_prop,order(-PROPDMG)),]
damage_prop_order1<-head(damage_prop_order,10)
print(damage_prop_order1)
## EVTYPE PROPDMG
## 167 FLOOD 144657709807
## 393 HURRICANE/TYPHOON 69305840000
## 826 TORNADO 56937160779
## 656 STORM SURGE 43323536000
## 151 FLASH FLOOD 16140812067
## 241 HAIL 15732267543
## 385 HURRICANE 11868319010
## 839 TROPICAL STORM 7703890550
## 962 WINTER STORM 6688497251
## 343 HIGH WIND 5270046295
knitr::opts_chunk$set(echo = TRUE)
# subsetting thr data which has more values to affect the economic lfe
#convert the exponents into numeric value and thus calculate the crop damage
data_df$CROPDMG[data_df$CROPDMGEXP=="K"]<-data_df$CROPDMG[data_df$CROPDMGEXP== "K"]*1000
data_df$CROPDMG[data_df$CROPDMGEXP=="M"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="M"]*(10^6)
data_df$CROPDMG[data_df$CROPDMGEXP=="B"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="B"]*(10^9)
data_df$CROPDMG[data_df$CROPDMGEXP=="k"]<-data_df$CROPDMG[data_df$CROPDMGEXP=="k"]*(1000)
head(data_df[,c("EVTYPE","CROPDMG","CROPDMGEXP")])
## EVTYPE CROPDMG CROPDMGEXP
## 1 TORNADO 0
## 2 TORNADO 0
## 3 TORNADO 0
## 4 TORNADO 0
## 5 TORNADO 0
## 6 TORNADO 0
damage_crop<-aggregate(CROPDMG~EVTYPE,data =data_df,sum,na.rm=TRUE)
damage_crop_order<-damage_crop[with(damage_crop,order(-CROPDMG)),]
damage_crop_order1<-head(damage_crop_order,10)
print(damage_crop_order1)
## EVTYPE CROPDMG
## 91 DROUGHT 13972566000
## 167 FLOOD 5661968450
## 577 RIVER FLOOD 5029459000
## 422 ICE STORM 5022113500
## 241 HAIL 3025954473
## 385 HURRICANE 2741910000
## 393 HURRICANE/TYPHOON 2607872800
## 151 FLASH FLOOD 1421317100
## 132 EXTREME COLD 1292973000
## 198 FROST/FREEZE 1094086000
Plot showing the property damage accross diferent event type
knitr::opts_chunk$set(echo = TRUE)
#plotting
par(mfrow=c(1,2),mar=c(11,3,3,2))
##plot the graph showing the top 10 property damages
barplot(damage_prop_order1$PROPDMG/(10^9),names.arg=damage_prop_order1$EVTYPE,las=2,col="purple",ylab="Prop.damage(billions)",main="Events Vs Top10 Prop.Damages")
Plot showing the crop damage accross diferent event type
knitr::opts_chunk$set(echo = TRUE)
##plot the graph showing the top 10 crop damages
barplot(damage_crop_order1$CROPDMG/(10^9),names.arg=damage_crop_order1$EVTYPE,las=2,col="RED",ylab="Crop damage(billions)",main="Events Vs Top10 Crop.Damages")