This report only reads 150000 lines of the whole data.

My time is limited because I will have a test in a local university for 6 days in 1 day without any internet condition , so I decided to do the assignment without cleaning the data.

What’s more , because of the language setting of my computer , I cannot read the whole data without error message.

I’m sure I will do this assignment again since this version is not good.

Anyway , evaluate my assignment with justice!

instructions :

This report includes the analysis of a data from national weather service. You can find the answer about: 1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

2.Across the United States, which types of events have the greatest economic consequences?

Data Processing

small_data<-read.csv("G:\\codelessons\\Data Science\\ASSIGNMENTS\\reproducible research\\assignment2\\repdata-data-StormData.csv",nrows = 150000);
fatality_mean<-aggregate(small_data$FATALITIES,list(small_data$EVTYPE),mean,na.rm=TRUE);
names(fatality_mean)<-c("event_type","mean_of_all_cases");
fatality_mean
##   event_type mean_of_all_cases
## 1       HAIL         8.998e-05
## 2    TORNADO         1.289e-01
## 3  TSTM WIND         2.830e-03
fatality_sum<-aggregate(small_data$FATALITIES,list(small_data$EVTYPE),sum,na.rm = TRUE);
names(fatality_sum)<-c("event_type","sum_of_all_cases");
fatality_sum;
##   event_type sum_of_all_cases
## 1       HAIL                4
## 2    TORNADO             3630
## 3  TSTM WIND              219
injury_mean<-aggregate(small_data$INJURIES,list(small_data$EVTYPE),mean,na.rm = TRUE);
names(injury_mean)<-c("event_type","mean_of_all_cases");
injury_mean;
##   event_type mean_of_all_cases
## 1       HAIL          0.005872
## 2    TORNADO          2.150458
## 3  TSTM WIND          0.036385
injury_sum<-aggregate(small_data$INJURIES,list(small_data$EVTYPE),sum,na.rm = TRUE);
names(injury_sum)<-c("event_type","sum_of_all_cases");
injury_sum;
##   event_type sum_of_all_cases
## 1       HAIL              261
## 2    TORNADO            60544
## 3  TSTM WIND             2816

plot of fatality_sum:

library(ggplot2)
plot_1<-ggplot(fatality_sum , aes(event_type ,sum_of_all_cases));
plot_1 + geom_bar(fill = "red",stat = "identity")

plot of chunk unnamed-chunk-1

plot of injury_sum:

plot_2<-ggplot(injury_sum , aes(event_type ,sum_of_all_cases));
plot_2 + geom_bar(fill = "red",stat = "identity")

plot of chunk unnamed-chunk-2

Results : TORNADO is the most harmful event.

compute the mean of damaged property

property_damage_mean<-aggregate(small_data$PROPDMG,list(small_data$EVTYPE),mean);
names(property_damage_mean)<-c("type","num");
property_damage_mean
##        type   num
## 1      HAIL  0.00
## 2   TORNADO 55.84
## 3 TSTM WIND  0.00

compute the sum of damaged property

property_damage_sum<-aggregate(small_data$PROPDMG,list(small_data$EVTYPE),sum);
names(property_damage_sum)<-c("type","num");
property_damage_sum
##        type     num
## 1      HAIL       0
## 2   TORNADO 1572058
## 3 TSTM WIND       0

Results : TORNADO damages the most property