In this analysis, we just used the basic slicing methods to find out the worst natural event in terms of human health and economy. Human health part was rather easy because in the economical part we need to consider the units of the numerical values, for instance is the given number thousands of dollars, or billions of dollars.
First we will start by reading and storing our data.
data <- read.csv("repdata-data-StormData.csv.bz2")
Regarding the questions we are trying to answer we will only keep few of the columns from the original data. That’s why we will create two data sets each of which is dealing with one of the questions.
health_data <- data.frame("EVTYPE"=data$EVTYPE, "FATALITIES"=data$FATALITIES, "INJURIES"=data$INJURIES)
economic_data <- data.frame("EVTYPE"=data$EVTYPE, "PROPDMG"=data$PROPDMG, "PROPDMGEXP"=data$PROPDMGEXP, "CROPDMG"=data$CROPDMG, "CROPDMGEXP"=data$CROPDMGEXP)
First we will add up the whole fatalities groupped by the event type.
sum_fatal_by_type <- tapply(health_data$FATALITIES,health_data$EVTYPE,sum)
fatal_health_data <- data.frame(
EVTYPE = names(sum_fatal_by_type),
FATALITIES = as.numeric(sum_fatal_by_type)
)
Now we will add injuries column to this data.
sum_inj_by_type <- tapply(health_data$INJURIES,health_data$EVTYPE,sum)
total_health_data <- cbind(fatal_health_data,as.numeric(sum_inj_by_type))
names(total_health_data) <- c("EVTYPE","FATALITIES","INJURIES")
Now we will add up the fatalities and injuries in this table to find out the total casulties occur in terms of health by the event type. After doing so we will drop the rows that will have zero on this summation coulmn as they contribute nothing to this part of the analysis.
total_health_data$SUM <- total_health_data$FATALITIES+total_health_data$INJURIES
total_health_data <- total_health_data[total_health_data$SUM!=0,]
Now we will take a look at the scatter plot of this data, whose \(x\)-axis is just the index of the row and the \(y\)-axis is the summation column. We will use logarithmic graph as there might be big differences between the data points.
plot(log(total_health_data$SUM), ylab="Total Casualties (in log)", main="Plot of Total Casualties vs Index")
Now we will find where the maximum number occurs.
total_health_data$EVTYPE[which.max(total_health_data$SUM)]
## [1] "TORNADO"
Therefore, tornado is the most harmful event for people’s health, we can check this in our graph as well.
plot(log(total_health_data$SUM), ylab="Total Casualties (in log)", main="Plot of Total Casualties vs Index")
abline(v=which.max(total_health_data$SUM),col="red",lty=2)
Now we will deal with the second question, which deals with the economic effects of the natural events. First we will create a list that take the values of the total property damage occured by each of the event type.
property_dmg <- numeric(0)
for (type in unique(economic_data$EVTYPE)){
type_data <- economic_data[economic_data$EVTYPE==type,]
sum <- 0
for (i in 1:length(type_data$PROPDMGEXP)){
if (type_data$PROPDMGEXP[i] %in% c("K","3")){
sum <- sum + 1000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("M","6","m")){
sum <- sum + 1000000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("B")){
sum <- sum + 1000000000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("5")){
sum <- sum + 100000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("4")){
sum <- sum + 10000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("2","h","H")){
sum <- sum + 100*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("7")){
sum <- sum + 10000000*type_data$PROPDMG[i]
}else if (type_data$PROPDMGEXP[i] %in% c("8")){
sum <- sum + 100000000*type_data$PROPDMG[i]
}else {
sum <- sum + 100000000*type_data$PROPDMG[i]
}
}
property_dmg <- c(property_dmg, sum)
}
Now we will do the same for crop damages.
crop_dmg <- numeric(0)
for (type in unique(economic_data$EVTYPE)){
type_data <- economic_data[economic_data$EVTYPE==type,]
sum <- 0
for (i in 1:length(type_data$CROPDMGEXP)){
if (type_data$CROPDMGEXP[i] %in% c("K","3")){
sum <- sum + 1000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("M","6","m")){
sum <- sum + 1000000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("B")){
sum <- sum + 1000000000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("5")){
sum <- sum + 100000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("4")){
sum <- sum + 10000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("2","h","H")){
sum <- sum + 100*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("7")){
sum <- sum + 10000000*type_data$CROPDMG[i]
}else if (type_data$CROPDMGEXP[i] %in% c("8")){
sum <- sum + 100000000*type_data$CROPDMG[i]
}else {
sum <- sum + 100000000*type_data$CROPDMG[i]
}
}
crop_dmg <- c(crop_dmg, sum)
}
Now let’s create list out of these damage data.
total_economic_data <- data.frame(EVTYPE=unique(economic_data$EVTYPE),TOTPROPDMG=property_dmg,TOTCROPDMG=crop_dmg)
Now we will create another column obtained by adding the two different damage values. Later we will drop the rows whose summation column is just zero
total_economic_data$SUM <- total_economic_data$TOTPROPDMG+total_economic_data$TOTCROPDMG
total_economic_data <- total_economic_data[total_economic_data$SUM!=0,]
Now we will take a look at the scatter plot of this data, whose \(x\)-axis is just the index of the row and the \(y\)-axis is the summation column. We will use logarithmic graph as there might be big differences between the data points.
plot(log(total_economic_data$SUM), ylab="Total Damage (in log)", main="Plot of Total Damage vs Index")
Now we will find where the maximum number occurs.
total_economic_data$EVTYPE[which.max(total_economic_data$SUM)]
## [1] "THUNDERSTORM WINDS"
Therefore, thunderstorm winds are the most harmful event for economy, we can check this in our graph as well.
plot(log(total_economic_data$SUM), ylab="Total Damage (in log)", main="Plot of Total Damage vs Index")
abline(v=which.max(total_economic_data$SUM),col="red",lty=2)
Therefore, by our analysis we found out that tornados causes the worst problem in terms of people’s health by causing 5633 deaths and over 91000 injures, whereas thunderstorm winds cause the worst problem in terms of economy by causing over 602 billion dollars property damage and over 9 billion dollars crop damage.