Title: “Exploring which weather elements have the most health and economic consequences” Synopsis: “I downloaded the data as a csv file. Extracted the columns for Event type, Population, Propert and Crop damages For population data, I grouped the data by Event type, sum them up and plotted For economic damages, I multipled the values in the Damages column with thier exponent. Then grouped the data by event, took the sum and plotted again”
Creating my working directory Download data from link Load data into R Look at the head, structure, summary of data to get a feel of the data architechture Look at the column names
if (!file.exists('Wk4_a')) {
dir.create('Wk4_a')
}
#fileurl = 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2'
#download.file(fileurl, destfile='./Wk4_ReproducibleResearch/Storm.csv', method='curl')
StormData=read.csv('~/Desktop/Wk4_a/Storm.csv',stringsAsFactors = FALSE)
#head(StormData)
#str(StormData)
#summary(StormData)
names(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Question: What type of events are most harmful with respect to population Events coded by Factor Variable Evtypes I measured harmfulness of events by the number of injuries and fatalities caused by each event type I found the total of fatalities for each Event as well as the total injures for each event
DATA PROCESSING For Fatality:
# Subset out just Event and Fatality Column
EvtFatality = StormData[,c(8,23)]
# Split the data by Event type
SplitFatality = split(EvtFatality, EvtFatality$EVTYPE)
# Sum all the fatalities for each event
Result = lapply(SplitFatality, function(x) sum(x[,2]))
# Format the data to a tidy data frame
Result1 = as.data.frame(unlist(Result))
Result2 = as.numeric(Result1[,1])
Name = as.data.frame(names(Result))
#Name
DataFatality = cbind(Name,Result2)
#head(DataFatality)
colnames(DataFatality)=c('Evtype', 'FatalitySum')
#head(DataFatality)
# Find the index of the max value in the FatalitySum column
Ans = which.max(DataFatality[,2])
# Use that Index to identify the event type that have caused the most death
DataFatality[Ans,]
## Evtype FatalitySum
## 834 TORNADO 5633
For Injuries:
# Subset out just Event and Injury Column
EvtInjury = StormData[,c(8,24)]
# Split the data by Event type
SplitInjury = split(EvtInjury, EvtInjury$EVTYPE)
# Sum all the fatalities for each event
Result = lapply(SplitInjury, function(x) sum(x[,2]))
# Format the data to a tidy data frame
Result1 = as.data.frame(unlist(Result))
Result2 = as.numeric(Result1[,1])
Name = as.data.frame(names(Result))
#Name
DataInjury = cbind(Name,Result2)
colnames(DataInjury)=c('Evtype', 'InjurySum')
#head(DataInjury)
# Find the index of the max value in the InjurySum column
Ans = which.max(DataInjury[,2])
# Use that Index to identify the event type that have caused the most death
DataInjury[Ans,]
## Evtype InjurySum
## 834 TORNADO 91346
RESULT Summary: Tornado seem to be most harmful to population health as it causes the highest number of injuries (91346) as well as death(5663)
PLOTTING RESULT
SortedDataFatality <- DataFatality[order(-DataFatality$FatalitySum),]
FirstTenFatality <- SortedDataFatality[1:10,]
SortedDataInjury <- DataInjury[order(-DataInjury$InjurySum),]
FirstTenInjury <- SortedDataInjury[1:10,]
Stacked = cbind(FirstTenInjury,FirstTenFatality$FatalitySum)
colnames(Stacked) = c('Evtype','InjurySum','FatalitySum')
Data = as.matrix(Stacked[,-1])
#head(Data)
range = barplot(t(Data),col=c('red', 'blue'),las=2, axes = F, xaxt='n')
# Add y-axis
axis(side=2, at=seq(0,100000,by= 20000), labels=c('0','20000','40000','60000','80000','100000'))
# Add x-axis
axis(side=1, at=range, labels=FirstTenInjury$Evtype, cex.axis=.6, tick=F, font=2)
# Adding title
title(main='Events Most Harmful To Population Health', xlab='Event Type', cex.lab=2, cex.main =2)
legend ("topright",legend=c("Fatalities","Injuries"), col=c('blue','red'), lty=c(1,1))
Summary: Tornado seem to be most harmful to population health as it causes the highest number of injuries (91346) as well as death(5663)
Question: What type of events have the greatest economic consequences Events coded by Factor Variable Evtypes I measured that looking at the Property damage as well as Crop damage DATA PROCESSING
# Subset out just Property data
PropertyDamages = StormData[,c(8,25,26)]
#head(PropertyDamages)
#str(PropertyDamages)
# DMGEXP will be used to multiply by DMG to get the cash thats lost
# Exp have different codes, so will need to decode them into numbers
# The codes of DMG can be seen by running this
unique(PropertyDamages$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
# Decoding into numbers
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'K')] <- 1000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'M')] <- 1000000
# No exponents as in '' got 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '')] <- 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'B')] <- 1000000000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'm')] <- 1000000
# +, - and ? signs got 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '+')] <- 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '-')] <- 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '?')] <- 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'H')] <- 100
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == 'h')] <- 100
# All numbers got exponents correspoding to the number
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '0')] <- 0
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '5')] <- 100000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '6')] <- 1000000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '7')] <- 10000000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '8')] <- 100000000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '1')] <- 10
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '2')] <- 100
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '3')] <- 1000
PropertyDamages$PROPDMGEXP[(PropertyDamages$PROPDMGEXP == '4')] <- 10000
# Now multiply the Propdmgexp by the Propdmg column to get the PropCash
PropCash = PropertyDamages$PROPDMG * as.numeric(PropertyDamages$PROPDMGEXP)
# Add that to the dataframe
PropertyDamages['PropCash']= PropCash
#head(PropertyDamages)
#str(PropertyDamages)
# Split the data by Event type
SplitPropertyDamages = split(PropertyDamages, PropertyDamages$EVTYPE)
# Sum all the PropertyDamages for each event
Result = lapply(SplitPropertyDamages, function(x) sum(x[,4]))
#head(Result)
# Format the data to a tidy data frame
Result1 = as.data.frame(unlist(Result))
#head(Result1)
Result2 = as.numeric(Result1[,1])
#head(Result2)
Name = as.data.frame(names(Result))
#Name
DataPropertyDamages = cbind(Name,Result2)
#head(DataPropertyDamages)
colnames(DataPropertyDamages)=c('Evtype', 'ProperyDamageSum')
#DataPropertyDamages
Sorted <- DataPropertyDamages[order(-DataPropertyDamages$ProperyDamageSum),]
FirstTenProperty <- Sorted[1:10,]
# Find the index of the max value in the PropertyDamageSum column
Ans = which.max(DataPropertyDamages[,2])
# Use that Index to identify the event type that have caused the most death
DataPropertyDamages[Ans,]
## Evtype ProperyDamageSum
## 170 FLOOD 144657709800
# Subset out just Crop data
CropDamages = StormData[,c(8,27,28)]
#head(CropDamages)
#str(CropDamages)
# DMGEXP will be used to multiply by DMG to get the cash thats lost
# Exp have different codes, so will need to decode them into numbers
# The codes of DMG can be seen by running this
unique(CropDamages$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
# Decoding into numbers
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == 'K')] <- 1000
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == 'k')] <- 1000
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == 'M')] <- 1000000
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == 'm')] <- 1000000
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == 'B')] <- 1000000000
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == '0')] <- 0
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == '2')] <- 100
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == '')] <- 0
CropDamages$CROPDMGEXP[(CropDamages$CROPDMGEXP == '?')] <- 0
# Now multiply the Cropdmgexp by the Cropdmg column to get the PropCash
CropCash = CropDamages$CROPDMG * as.numeric(CropDamages$CROPDMGEXP)
# Add that to the dataframe
CropDamages['CropCash']= CropCash
#head(CropDamages)
#str(CropDamages)
# Split the data by Event type
SplitCropDamages = split(CropDamages, CropDamages$EVTYPE)
# Sum all the CropDamages for each event
Result = lapply(SplitCropDamages, function(x) sum(x[,4]))
#head(Result)
# Format the data to a tidy data frame
Result1 = as.data.frame(unlist(Result))
#head(Result1)
Result2 = as.numeric(Result1[,1])
#head(Result2)
Name = as.data.frame(names(Result))
#Name
DataCropDamages = cbind(Name,Result2)
#head(DataCropDamages)
colnames(DataCropDamages)=c('Evtype', 'CropDamageSum')
Sorted <- DataCropDamages[order(-DataCropDamages$CropDamageSum),]
FirstTenCrop <- Sorted[1:10,]
# Find the index of the max value in the PropertyDamageSum column
Ans = which.max(DataCropDamages[,2])
# Use that Index to identify the event type that have caused the most death
DataCropDamages[Ans,]
## Evtype CropDamageSum
## 95 DROUGHT 13972566000
PLOTTING RESULT
Stacked = cbind(FirstTenProperty,FirstTenCrop$CropDamageSum)
colnames(Stacked) = c('Evtype','PropertyDamages','CropDamages')
Data = as.matrix(Stacked[,-1])
#head(Data)
range = barplot(t(Data),col=c('red', 'blue'),las=2, axes = F, xaxt='n')
# Add y-axis
axis(side=2, at=seq(0,1.5e+11,by= 5e+10), labels=c('0','5.0e+10','1.0e+11','1.5e+11'))
# Add x-axis
axis(side=1, at=range, labels=FirstTenProperty$Evtype, cex.axis=.6, tick=F, font=2)
# Adding title
title(main='Events with greatest economic consequences', xlab='Event Type', cex.lab=2, cex.main =2)
legend ("topright",legend=c("Property Damage","Crop Damage"), col=c('blue','red'), lty=c(1,1))
RESULT Summary: Floods seem to have the most economic damage