download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile="C:/Users/sanjayx/Desktop/coursera/StormData.csv.bz2")
storms<-data.table::fread("C:/Users/sanjayx/Desktop/coursera/StormData.csv.bz2")
names(storms)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
dim(storms)
## [1] 902297 37
summary(storms)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE
## Min. : 1.0 Length:902297 Length:902297 Length:902297
## 1st Qu.:19.0 Class :character Class :character Class :character
## Median :30.0 Mode :character Mode :character Mode :character
## Mean :31.2
## 3rd Qu.:45.0
## Max. :95.0
##
## COUNTY COUNTYNAME STATE EVTYPE
## Min. : 0.0 Length:902297 Length:902297 Length:902297
## 1st Qu.: 31.0 Class :character Class :character Class :character
## Median : 75.0 Mode :character Mode :character Mode :character
## Mean :100.6
## 3rd Qu.:131.0
## Max. :873.0
##
## BGN_RANGE BGN_AZI BGN_LOCATI
## Min. : 0.000 Length:902297 Length:902297
## 1st Qu.: 0.000 Class :character Class :character
## Median : 0.000 Mode :character Mode :character
## Mean : 1.484
## 3rd Qu.: 1.000
## Max. :3749.000
##
## END_DATE END_TIME COUNTY_END COUNTYENDN
## Length:902297 Length:902297 Min. :0 Mode:logical
## Class :character Class :character 1st Qu.:0 NA's:902297
## Mode :character Mode :character Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
##
## END_RANGE END_AZI END_LOCATI
## Min. : 0.0000 Length:902297 Length:902297
## 1st Qu.: 0.0000 Class :character Class :character
## Median : 0.0000 Mode :character Mode :character
## Mean : 0.9862
## 3rd Qu.: 0.0000
## Max. :925.0000
##
## LENGTH WIDTH F MAG
## Min. : 0.0000 Min. : 0.000 Min. :0.0 Min. : 0.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.:0.0 1st Qu.: 0.0
## Median : 0.0000 Median : 0.000 Median :1.0 Median : 50.0
## Mean : 0.2301 Mean : 7.503 Mean :0.9 Mean : 46.9
## 3rd Qu.: 0.0000 3rd Qu.: 0.000 3rd Qu.:1.0 3rd Qu.: 75.0
## Max. :2315.0000 Max. :4400.000 Max. :5.0 Max. :22000.0
## NA's :843563
## FATALITIES INJURIES PROPDMG
## Min. : 0.0000 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 0.0000 Median : 0.0000 Median : 0.00
## Mean : 0.0168 Mean : 0.1557 Mean : 12.06
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.50
## Max. :583.0000 Max. :1700.0000 Max. :5000.00
##
## PROPDMGEXP CROPDMG CROPDMGEXP
## Length:902297 Min. : 0.000 Length:902297
## Class :character 1st Qu.: 0.000 Class :character
## Mode :character Median : 0.000 Mode :character
## Mean : 1.527
## 3rd Qu.: 0.000
## Max. :990.000
##
## WFO STATEOFFIC ZONENAMES LATITUDE
## Length:902297 Length:902297 Length:902297 Min. : 0
## Class :character Class :character Class :character 1st Qu.:2802
## Mode :character Mode :character Mode :character Median :3540
## Mean :2875
## 3rd Qu.:4019
## Max. :9706
## NA's :47
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## Min. :-14451 Min. : 0 Min. :-14455 Length:902297
## 1st Qu.: 7247 1st Qu.: 0 1st Qu.: 0 Class :character
## Median : 8707 Median : 0 Median : 0 Mode :character
## Mean : 6940 Mean :1452 Mean : 3509
## 3rd Qu.: 9605 3rd Qu.:3549 3rd Qu.: 8735
## Max. : 17124 Max. :9706 Max. :106220
## NA's :40
## REFNUM
## Min. : 1
## 1st Qu.:225575
## Median :451149
## Mean :451149
## 3rd Qu.:676723
## Max. :902297
##
save(storms, file="C:/Users/sanjayx/Desktop/coursera/storms.RData")
load(file="C:/Users/sanjayx/Desktop/coursera/storms.RData")
ok<-complete.cases(storms$EVTYPE,storms$FATALITIES)
sum(!ok) # how many are not "ok" ?
## [1] 0
fatality<-storms[,c("EVTYPE","FATALITIES")]
sum_fatal<-aggregate(fatality$FATALITIES,list(fatality$EVTYPE), FUN=sum,na.rm=TRUE, na.action=NULL)
sum_fatal_top_5<-head(sum_fatal[order(sum_fatal$x, decreasing=TRUE), ], 5)
barplot(sum_fatal_top_5$x,names.arg=sum_fatal_top_5$Group.1,main="Fatalities by event type",ylab ="Fatalies")
injury<-storms[,c("EVTYPE","INJURIES")]
sum_injury<-aggregate(injury$INJURIES,list(injury$EVTYPE), FUN=sum,na.rm=TRUE, na.action=NULL)
sum_injury_top_5<-head(sum_injury[order(sum_injury$x, decreasing=TRUE), ], 5)
barplot(sum_injury_top_5$x,names.arg=sum_injury_top_5$Group.1,main="Injuries by event type",ylab ="injury")
##Across the United States, which types of events have the greatest economic consequences?
unique(storms$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
library(plyr)
propexp<- mapvalues(storms$PROPDMGEXP, from = c("K", "M","", "B", "m", "+", "0", "5", "6", "?", "4", "2", "3", "h", "7", "H", "-", "1", "8"), to = c(10^3, 10^6, 1, 10^9, 10^6, 0,1,10^5, 10^6, 0, 10^4, 10^2, 10^3, 10^2, 10^7, 10^2, 0, 10, 10^8))
storms$PROPDMGEXP <- as.numeric(as.character(propexp))
storms$PROPDMGTOTAL <- (storms$PROPDMG * storms$PROPDMGEXP)
unique(storms$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
cropexp <- mapvalues(storms$CROPDMGEXP, from = c("","M", "K", "m", "B", "?", "0", "k","2"), to = c(1,10^6, 10^3, 10^6, 10^9, 0, 1, 10^3, 10^2))
storms$CROPDMGEXP <- as.numeric(as.character(cropexp))
storms$CROPDMGTOTAL <- (storms$CROPDMG * storms$CROPDMGEXP)
storms$TOTALDMG<-storms$PROPDMGTOTA+storms$CROPDMGTOTA
damage<-storms[,c("EVTYPE","TOTALDMG")]
sum_damage<-aggregate(damage$TOTALDMG,list(damage$EVTYPE), FUN=sum,na.rm=TRUE, na.action=NULL)
sum_damage_top_5<-head(sum_damage[order(sum_damage$x, decreasing=TRUE), ], 5)
barplot(sum_damage_top_5$x,names.arg=sum_damage_top_5$Group.1,main="Damage by event type",ylab ="Damage")