This report only reads 150000 lines of the whole data.
My time is limited because I will have a test in a local university for 6 days in 1 day without any internet condition , so I decided to do the assignment without cleaning the data.
What’s more , because of the language setting of my computer , I cannot read the whole data without error message.
I’m sure I will do this assignment again since this version is not good.
Anyway , evaluate my assignment with justice!
instructions :
This report includes the analysis of a data from national weather service. You can find the answer about: 1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
2.Across the United States, which types of events have the greatest economic consequences?
Data Processing
small_data<-read.csv("G:\\codelessons\\Data Science\\ASSIGNMENTS\\reproducible research\\assignment2\\repdata-data-StormData.csv",nrows = 150000);
fatality_mean<-aggregate(small_data$FATALITIES,list(small_data$EVTYPE),mean,na.rm=TRUE);
names(fatality_mean)<-c("event_type","mean_of_all_cases");
fatality_mean
## event_type mean_of_all_cases
## 1 HAIL 8.998e-05
## 2 TORNADO 1.289e-01
## 3 TSTM WIND 2.830e-03
fatality_sum<-aggregate(small_data$FATALITIES,list(small_data$EVTYPE),sum,na.rm = TRUE);
names(fatality_sum)<-c("event_type","sum_of_all_cases");
fatality_sum;
## event_type sum_of_all_cases
## 1 HAIL 4
## 2 TORNADO 3630
## 3 TSTM WIND 219
injury_mean<-aggregate(small_data$INJURIES,list(small_data$EVTYPE),mean,na.rm = TRUE);
names(injury_mean)<-c("event_type","mean_of_all_cases");
injury_mean;
## event_type mean_of_all_cases
## 1 HAIL 0.005872
## 2 TORNADO 2.150458
## 3 TSTM WIND 0.036385
injury_sum<-aggregate(small_data$INJURIES,list(small_data$EVTYPE),sum,na.rm = TRUE);
names(injury_sum)<-c("event_type","sum_of_all_cases");
injury_sum;
## event_type sum_of_all_cases
## 1 HAIL 261
## 2 TORNADO 60544
## 3 TSTM WIND 2816
plot of fatality_sum:
library(ggplot2)
plot_1<-ggplot(fatality_sum , aes(event_type ,sum_of_all_cases));
plot_1 + geom_bar(fill = "red",stat = "identity")
plot of injury_sum:
plot_2<-ggplot(injury_sum , aes(event_type ,sum_of_all_cases));
plot_2 + geom_bar(fill = "red",stat = "identity")
compute the mean of damaged property
property_damage_mean<-aggregate(small_data$PROPDMG,list(small_data$EVTYPE),mean);
names(property_damage_mean)<-c("type","num");
property_damage_mean
## type num
## 1 HAIL 0.00
## 2 TORNADO 55.84
## 3 TSTM WIND 0.00
compute the sum of damaged property
property_damage_sum<-aggregate(small_data$PROPDMG,list(small_data$EVTYPE),sum);
names(property_damage_sum)<-c("type","num");
property_damage_sum
## type num
## 1 HAIL 0
## 2 TORNADO 1572058
## 3 TSTM WIND 0