Project Details

Through this project, I have tried to analyze Storms data from 1975 to 2015.
Points that I have tried to answer through this project:
1. Year with max. no. of storms
2. Max. Wind Speed and Avg. Speed of Storms in the year with max. no. of storms
3. Max. Pressure and Avg. Pressure of Storms in the year with max. no. of storms
4. Avg. Wind Speed & Pressure of Storms per year
5. Avg. Wind Speed & Pressure of Storms as per Category

Dataset Description

This data is a subset of the NOAA Atlantic hurricane database best track data, http://www.nhc.noaa.gov/data/#hurdat. The data includes the positions and attributes of 198 tropical storms, measured every six hours during the lifetime of a storm.

Usage

storms

Format

A tibble with 10,010 observations and 13 variables:

  1. name
  • Storm Name
  1. year,month,day
  • Date of report
  1. hour
  • Hour of report (in UTC)
  1. lat,long
  • Location of storm center
  1. status
  • Storm classification (Tropical Depression, Tropical Storm, or Hurricane)
  1. category
  • Saffir-Simpson storm category (estimated from wind speed. -1 = Tropical Depression, 0 = Tropical Storm)
  1. wind
  • storm’s maximum sustained wind speed (in knots)
  1. pressure
  • Air pressure at the storm’s center (in millibars)
  1. ts_diameter
  • Diameter of the area experiencing tropical storm strength winds (34 knots or above)
  1. hu_diameter
  • Diameter of the area experiencing hurricane strength winds (64 knots or above)
knitr::opts_chunk$set(fig.width=12,fig.height=8)

Loading Required Packages for Analysis

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Basic Details

nrow(storms)
## [1] 10010
ncol(storms)
## [1] 13

Creating Variables

Total_Storms<-array()
i<-integer()
storms_year<-data.frame()
total<-numeric()

Exploring Data

total<-sum(table(unique(storms$name)))
m<-matrix(total,dimnames = list("Total No. of Storms 1975-2015"))
drop(m)
## Total No. of Storms 1975-2015 
##                           198
table(storms$year)
## 
## 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 
##   86   52   53   54  301  161  164  105   79  236  263   70   80  259  356  354 
## 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 
##  131  185  129  206  660  315  154  413  210  318  370  285  422  410  498  190 
## 2007 2008 2009 2010 2011 2012 2013 2014 2015 
##  213  335  153  402  323  454  202  139  220
View(storms[storms$year==2005,])

table(unique(storms[storms$year==2005,]$name))
## 
##     Beta    Emily  Epsilon Franklin    Gamma     Gert   Harvey  Katrina 
##        1        1        1        1        1        1        1        1 
##      Lee    Maria     Nate Nineteen  Ophelia Philippe     Rita     Stan 
##        1        1        1        1        1        1        1        1 
##    Tammy      Ten    Vince    Wilma     Zeta 
##        1        1        1        1        1
sum(table(unique(storms[storms$year==2005,]$name)))
## [1] 21
length(unique(storms[storms$year==2005,]$name))
## [1] 21

Finding no of storms per year

for(i in 1:41)
  {
    Total_Storms[i]<-length(unique(storms[storms$year==i+1974,]$name))
  }
Total_Storms[1:41]
##  [1]  3  2  3  4  7  8  5  5  4 10 10  4  3 10 11 12  9  7  6 11 21 10  7 12 10
## [26] 16 17 12 20 13 21  9 16 14 10 20 15 17 14  7 11
storms_year<-data.frame(Year=1975:2015,No._of_Storms=Total_Storms[1:41])
storms_year
##    Year No._of_Storms
## 1  1975             3
## 2  1976             2
## 3  1977             3
## 4  1978             4
## 5  1979             7
## 6  1980             8
## 7  1981             5
## 8  1982             5
## 9  1983             4
## 10 1984            10
## 11 1985            10
## 12 1986             4
## 13 1987             3
## 14 1988            10
## 15 1989            11
## 16 1990            12
## 17 1991             9
## 18 1992             7
## 19 1993             6
## 20 1994            11
## 21 1995            21
## 22 1996            10
## 23 1997             7
## 24 1998            12
## 25 1999            10
## 26 2000            16
## 27 2001            17
## 28 2002            12
## 29 2003            20
## 30 2004            13
## 31 2005            21
## 32 2006             9
## 33 2007            16
## 34 2008            14
## 35 2009            10
## 36 2010            20
## 37 2011            15
## 38 2012            17
## 39 2013            14
## 40 2014             7
## 41 2015            11
  ggplot(data=storms_year,aes(x=Year,y=No._of_Storms,fill=No._of_Storms))+
  geom_bar(stat="identity")+
  geom_text(aes(label=No._of_Storms),vjust=-0.2,size=3)+
  xlab("Year")+ylab("No. of Storms Per Year")

max(storms_year$No._of_Storms)
## [1] 21
print("Year with maximum no of storms:")
## [1] "Year with maximum no of storms:"
storms_year[storms_year$No._of_Storms==21,]
##    Year No._of_Storms
## 21 1995            21
## 31 2005            21

Analyzing Storms in 2005

Max. Speed & Avg. Speed Data

Storm_2005<-storms[storms$year==2005,]

View(Storm_2005)

print("Names of Different Storms: ")
## [1] "Names of Different Storms: "
unique(Storm_2005$name)
##  [1] "Emily"    "Franklin" "Gert"     "Harvey"   "Ten"      "Katrina" 
##  [7] "Lee"      "Maria"    "Nate"     "Ophelia"  "Philippe" "Rita"    
## [13] "Nineteen" "Stan"     "Tammy"    "Vince"    "Wilma"    "Beta"    
## [19] "Gamma"    "Epsilon"  "Zeta"
print("Storm with max. wind speed in 2005: ")
## [1] "Storm with max. wind speed in 2005: "
as.data.frame(Storm_2005[which.max(Storm_2005$wind),])
##    name year month day hour  lat  long    status category wind pressure
## 1 Wilma 2005    10  19   12 17.3 -82.8 hurricane        5  160      882
##   ts_diameter hu_diameter
## 1    304.9567     74.8007
print("Storm's avg. wind speed in 2005: ")
## [1] "Storm's avg. wind speed in 2005: "
mean(Storm_2005$wind)
## [1] 59.36747
dat1<-aggregate(Storm_2005$wind,by=list(Storm_2005$name),FUN=mean)
colnames(dat1)<-c("Storm","AvgSpeed")
dat1
##       Storm AvgSpeed
## 1      Beta 56.94444
## 2     Emily 80.88889
## 3   Epsilon 58.55263
## 4  Franklin 44.84848
## 5     Gamma 33.09524
## 6      Gert 31.87500
## 7    Harvey 46.80000
## 8   Katrina 77.03125
## 9       Lee 28.46154
## 10    Maria 61.11111
## 11     Nate 58.50000
## 12 Nineteen 28.33333
## 13  Ophelia 56.14583
## 14 Philippe 47.80000
## 15     Rita 80.14286
## 16     Stan 39.70588
## 17    Tammy 35.62500
## 18      Ten 27.00000
## 19    Vince 43.00000
## 20    Wilma 91.93182
## 21     Zeta 43.75000
  ggplot(data=dat1,aes(x=1:length(Storm),y=signif(AvgSpeed,digits=4),fill=Storm))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(AvgSpeed,digits=4)), vjust=-0.2, size=3)+
  xlab("Name of Storms in 2005")+ylab("Avg. Wind Speed")

print("Max. Speed of a storm in the database: ")
## [1] "Max. Speed of a storm in the database: "
max(storms$wind)
## [1] 160
print("Storms with max. Speed in Database: ")
## [1] "Storms with max. Speed in Database: "
as.data.frame(storms[storms$wind==160,])
##      name year month day hour  lat  long    status category wind pressure
## 1 Gilbert 1988     9  14    0 19.7 -83.8 hurricane        5  160      888
## 2   Wilma 2005    10  19   12 17.3 -82.8 hurricane        5  160      882
##   ts_diameter hu_diameter
## 1          NA          NA
## 2    304.9567     74.8007

Max. Pressure and Avg. Pressure Data

print("Storm with max. pressure in 2005: ")
## [1] "Storm with max. pressure in 2005: "
as.data.frame(Storm_2005[which.max(Storm_2005$pressure),])
##   name year month day hour  lat  long              status category wind
## 1 Gert 2005     7  23   18 19.3 -92.9 tropical depression       -1   25
##   pressure ts_diameter hu_diameter
## 1     1011           0           0
print("Storm's avg. pressure in 2005: ")
## [1] "Storm's avg. pressure in 2005: "
mean(Storm_2005$pressure)
## [1] 985.8614
dat2<-aggregate(Storm_2005$pressure,by=list(Storm_2005$name),FUN=mean)
colnames(dat2)<-c("Storm","AvgPressure")
dat2
##       Storm AvgPressure
## 1      Beta    989.7778
## 2     Emily    976.1111
## 3   Epsilon    989.3684
## 4  Franklin   1000.8788
## 5     Gamma   1005.3810
## 6      Gert   1007.6250
## 7    Harvey    997.4000
## 8   Katrina    963.8438
## 9       Lee   1008.2308
## 10    Maria    987.7500
## 11     Nate    991.5000
## 12 Nineteen   1007.8889
## 13  Ophelia    989.4583
## 14 Philippe    996.8400
## 15     Rita    961.3714
## 16     Stan    998.7059
## 17    Tammy   1003.8750
## 18      Ten   1008.6000
## 19    Vince    996.5000
## 20    Wilma    954.1818
## 21     Zeta   1001.2500
  ggplot(data=dat2,aes(x=1:length(Storm),y=signif(AvgPressure,digits=4),fill=Storm))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(AvgPressure,digits=4)), vjust=-0.2, size=3)+
  xlab("Name of Storms in 2005")+ylab("Avg. Pressure")

  print("Max. Pressure of a storm in the database: ")
## [1] "Max. Pressure of a storm in the database: "
max(storms$pressure)
## [1] 1022
print("Storms with max. Pressure in Database: ")
## [1] "Storms with max. Pressure in Database: "
as.data.frame(storms[storms$pressure==1022,])
##       name year month day hour  lat  long              status category wind
## 1 AL072003 2003     7  26   12 32.3 -82.0 tropical depression       -1   20
## 2 AL072003 2003     7  26   18 32.8 -82.6 tropical depression       -1   15
## 3 AL072003 2003     7  27    0 33.0 -83.0 tropical depression       -1   15
##   pressure ts_diameter hu_diameter
## 1     1022          NA          NA
## 2     1022          NA          NA
## 3     1022          NA          NA

Finding Average Wind Speed & Pressure Per Year

speed_year<-aggregate(x=storms$wind,by=list(storms$year),FUN=mean)
speed_year
##    Group.1        x
## 1     1975 50.87209
## 2     1976 59.90385
## 3     1977 53.96226
## 4     1978 40.46296
## 5     1979 48.67110
## 6     1980 53.72671
## 7     1981 56.61585
## 8     1982 49.47619
## 9     1983 46.96203
## 10    1984 51.37712
## 11    1985 50.85551
## 12    1986 42.92857
## 13    1987 40.62500
## 14    1988 48.45560
## 15    1989 57.65449
## 16    1990 47.25989
## 17    1991 47.74809
## 18    1992 57.10811
## 19    1993 48.56589
## 20    1994 35.33981
## 21    1995 55.88636
## 22    1996 57.73016
## 23    1997 48.05195
## 24    1998 61.58596
## 25    1999 61.38095
## 26    2000 54.65409
## 27    2001 51.20270
## 28    2002 46.80702
## 29    2003 58.69668
## 30    2004 62.04878
## 31    2005 59.36747
## 32    2006 49.68421
## 33    2007 53.49765
## 34    2008 55.29851
## 35    2009 54.15033
## 36    2010 55.87065
## 37    2011 52.66254
## 38    2012 52.94053
## 39    2013 41.50990
## 40    2014 63.99281
## 41    2015 49.40909
pressure_year<-aggregate(x=storms$pressure,by=list(storms$year),FUN=mean)
pressure_year
##    Group.1         x
## 1     1975  994.6279
## 2     1976  988.7692
## 3     1977  995.3585
## 4     1978 1005.8333
## 5     1979  994.8372
## 6     1980  995.2112
## 7     1981  993.7988
## 8     1982  996.3143
## 9     1983 1001.3671
## 10    1984  994.5508
## 11    1985  993.9658
## 12    1986 1002.7429
## 13    1987  999.6250
## 14    1988  994.1737
## 15    1989  989.3680
## 16    1990  998.4350
## 17    1991  995.8550
## 18    1992  991.4811
## 19    1993  997.0853
## 20    1994 1004.2864
## 21    1995  988.7106
## 22    1996  988.0381
## 23    1997  996.3506
## 24    1998  987.7845
## 25    1999  982.8143
## 26    2000  992.6950
## 27    2001  994.7378
## 28    2002  995.4982
## 29    2003  989.0687
## 30    2004  984.5634
## 31    2005  985.8614
## 32    2006  996.9789
## 33    2007  991.6761
## 34    2008  988.9224
## 35    2009  992.2157
## 36    2010  989.5000
## 37    2011  994.4241
## 38    2012  991.4339
## 39    2013 1002.6584
## 40    2014  983.7770
## 41    2015  995.1318
avg_data<-data.frame(Year=speed_year$Group.1,Avg_Speed=speed_year$x,Avg_Pressure=pressure_year$x)
avg_data
##    Year Avg_Speed Avg_Pressure
## 1  1975  50.87209     994.6279
## 2  1976  59.90385     988.7692
## 3  1977  53.96226     995.3585
## 4  1978  40.46296    1005.8333
## 5  1979  48.67110     994.8372
## 6  1980  53.72671     995.2112
## 7  1981  56.61585     993.7988
## 8  1982  49.47619     996.3143
## 9  1983  46.96203    1001.3671
## 10 1984  51.37712     994.5508
## 11 1985  50.85551     993.9658
## 12 1986  42.92857    1002.7429
## 13 1987  40.62500     999.6250
## 14 1988  48.45560     994.1737
## 15 1989  57.65449     989.3680
## 16 1990  47.25989     998.4350
## 17 1991  47.74809     995.8550
## 18 1992  57.10811     991.4811
## 19 1993  48.56589     997.0853
## 20 1994  35.33981    1004.2864
## 21 1995  55.88636     988.7106
## 22 1996  57.73016     988.0381
## 23 1997  48.05195     996.3506
## 24 1998  61.58596     987.7845
## 25 1999  61.38095     982.8143
## 26 2000  54.65409     992.6950
## 27 2001  51.20270     994.7378
## 28 2002  46.80702     995.4982
## 29 2003  58.69668     989.0687
## 30 2004  62.04878     984.5634
## 31 2005  59.36747     985.8614
## 32 2006  49.68421     996.9789
## 33 2007  53.49765     991.6761
## 34 2008  55.29851     988.9224
## 35 2009  54.15033     992.2157
## 36 2010  55.87065     989.5000
## 37 2011  52.66254     994.4241
## 38 2012  52.94053     991.4339
## 39 2013  41.50990    1002.6584
## 40 2014  63.99281     983.7770
## 41 2015  49.40909     995.1318
print("Year with max. avg Speed: ")
## [1] "Year with max. avg Speed: "
avg_data[which.max(avg_data$Avg_Speed),]
##    Year Avg_Speed Avg_Pressure
## 40 2014  63.99281      983.777
print("Year with max. avg Pressure: ")
## [1] "Year with max. avg Pressure: "
avg_data[which.max(avg_data$Avg_Pressure),]
##   Year Avg_Speed Avg_Pressure
## 4 1978  40.46296     1005.833
  ggplot(data=avg_data,aes(x=Year,y=signif(Avg_Speed,digits=4),fill=Avg_Speed))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(Avg_Speed,digits=4)), vjust=-0.2, size=3)+
  xlab("Year")+ylab("Avg. Speed")

  ggplot(data=avg_data,aes(x=Year,y=signif(Avg_Pressure,digits=4),fill=Avg_Pressure))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(Avg_Pressure,digits=4)), vjust=-0.2, size=2.5)+
  xlab("Year")+ylab("Avg. Pressure")

Finding Average Wind Speed & Pressure as per category

speed_category<-aggregate(x=storms$wind,by=list(storms$category),FUN=mean)
speed_category
##   Group.1         x
## 1      -1  27.26916
## 2       0  45.80037
## 3       1  70.91098
## 4       2  89.43471
## 5       3 104.64187
## 6       4 121.55172
## 7       5 145.07353
pressure_category<-aggregate(x=storms$pressure,by=list(storms$category),FUN=mean)
pressure_category
##   Group.1         x
## 1      -1 1007.6259
## 2       0  999.3291
## 3       1  981.5181
## 4       2  967.4729
## 5       3  953.5289
## 6       4  939.5345
## 7       5  916.4265
avg_data_cat<-data.frame(Category=speed_category$Group.1,Avg_Speed=speed_category$x,Avg_Pressure=pressure_category$x)
avg_data_cat
##   Category Avg_Speed Avg_Pressure
## 1       -1  27.26916    1007.6259
## 2        0  45.80037     999.3291
## 3        1  70.91098     981.5181
## 4        2  89.43471     967.4729
## 5        3 104.64187     953.5289
## 6        4 121.55172     939.5345
## 7        5 145.07353     916.4265
  ggplot(data=avg_data_cat,aes(x=Category,y=signif(Avg_Speed,digits=4),fill=Category))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(Avg_Speed,digits=4)), vjust=-0.2, size=3)+
  xlab("Category")+ylab("Avg. Speed")

  ggplot(data=avg_data_cat,aes(x=Category,y=signif(Avg_Pressure,digits=4),fill=Category))+
  geom_bar(stat="identity")+
  geom_text(aes(label=signif(Avg_Pressure,digits=4)), vjust=-0.2, size=2.5)+
  xlab("Category")+ylab("Avg. Pressure")

Sources

The data used in this project is available as a part of ‘tidyr’ library.