Tropical Storm Data Wrangling

##task 1.1

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.1
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.1
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.1
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data=storms
glimpse(data)
## Rows: 19,537
## Columns: 13
## $ name                         <chr> "Amy", "Amy", "Amy", "Amy", "Amy", "Amy",…
## $ year                         <dbl> 1975, 1975, 1975, 1975, 1975, 1975, 1975,…
## $ month                        <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ day                          <int> 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 2…
## $ hour                         <dbl> 0, 6, 12, 18, 0, 6, 12, 18, 0, 6, 12, 18,…
## $ lat                          <dbl> 27.5, 28.5, 29.5, 30.5, 31.5, 32.4, 33.3,…
## $ long                         <dbl> -79.0, -79.0, -79.0, -79.0, -78.8, -78.7,…
## $ status                       <fct> tropical depression, tropical depression,…
## $ category                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wind                         <int> 25, 25, 25, 25, 25, 25, 25, 30, 35, 40, 4…
## $ pressure                     <int> 1013, 1013, 1013, 1013, 1012, 1012, 1011,…
## $ tropicalstorm_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hurricane_force_diameter     <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
str(data)
## tibble [19,537 × 13] (S3: tbl_df/tbl/data.frame)
##  $ name                        : chr [1:19537] "Amy" "Amy" "Amy" "Amy" ...
##  $ year                        : num [1:19537] 1975 1975 1975 1975 1975 ...
##  $ month                       : num [1:19537] 6 6 6 6 6 6 6 6 6 6 ...
##  $ day                         : int [1:19537] 27 27 27 27 28 28 28 28 29 29 ...
##  $ hour                        : num [1:19537] 0 6 12 18 0 6 12 18 0 6 ...
##  $ lat                         : num [1:19537] 27.5 28.5 29.5 30.5 31.5 32.4 33.3 34 34.4 34 ...
##  $ long                        : num [1:19537] -79 -79 -79 -79 -78.8 -78.7 -78 -77 -75.8 -74.8 ...
##  $ status                      : Factor w/ 9 levels "disturbance",..: 7 7 7 7 7 7 7 7 8 8 ...
##  $ category                    : num [1:19537] NA NA NA NA NA NA NA NA NA NA ...
##  $ wind                        : int [1:19537] 25 25 25 25 25 25 25 30 35 40 ...
##  $ pressure                    : int [1:19537] 1013 1013 1013 1013 1012 1012 1011 1006 1004 1002 ...
##  $ tropicalstorm_force_diameter: int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
##  $ hurricane_force_diameter    : int [1:19537] NA NA NA NA NA NA NA NA NA NA ...

##task 1.2

##The storms dataset includes more than 19,537 observations across 13 variables. Each record corresponds to a storm at a particular date and time, providing details such as the storm’s name, position, wind speed, pressure, and status. ##Some data quality concerns include missing values in the pressure field and storms that change status over time, which must be handled carefully during analysis.

##task 2.1 (filtering and selecting)

hurricane_subset <- storms %>%
  filter(status == "hurricane", year > 2000) %>%
  select(name, year, month, day, category, wind)
print(hurricane_subset)
## # A tibble: 2,506 × 6
##    name   year month   day category  wind
##    <chr> <dbl> <dbl> <int>    <dbl> <int>
##  1 Erin   2001     9     9        1    75
##  2 Erin   2001     9     9        2    90
##  3 Erin   2001     9     9        2    95
##  4 Erin   2001     9     9        3   105
##  5 Erin   2001     9    10        3   105
##  6 Erin   2001     9    10        3   105
##  7 Erin   2001     9    10        3   100
##  8 Erin   2001     9    10        2    90
##  9 Erin   2001     9    11        1    80
## 10 Erin   2001     9    11        1    80
## # ℹ 2,496 more rows

#task 2.2 (mutating)

hurricane_subset <- hurricane_subset %>%
  mutate(wind_kph = wind * 1.852)
print(hurricane_subset)
## # A tibble: 2,506 × 7
##    name   year month   day category  wind wind_kph
##    <chr> <dbl> <dbl> <int>    <dbl> <int>    <dbl>
##  1 Erin   2001     9     9        1    75     139.
##  2 Erin   2001     9     9        2    90     167.
##  3 Erin   2001     9     9        2    95     176.
##  4 Erin   2001     9     9        3   105     194.
##  5 Erin   2001     9    10        3   105     194.
##  6 Erin   2001     9    10        3   105     194.
##  7 Erin   2001     9    10        3   100     185.
##  8 Erin   2001     9    10        2    90     167.
##  9 Erin   2001     9    11        1    80     148.
## 10 Erin   2001     9    11        1    80     148.
## # ℹ 2,496 more rows

#task 2.3 (summarizing)

storm_of_the_century <- storms %>%
  filter(year >= 2010, status == "hurricane") %>%
  group_by(year, name) %>%
  summarise(
    max_wind = max(wind, na.rm = TRUE),
    min_pressure = min(pressure, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(year, desc(max_wind))
print(storm_of_the_century)
## # A tibble: 102 × 4
##     year name     max_wind min_pressure
##    <dbl> <chr>       <int>        <int>
##  1  2010 Igor          135          924
##  2  2010 Earl          125          927
##  3  2010 Julia         120          948
##  4  2010 Danielle      115          942
##  5  2010 Karl          110          956
##  6  2010 Alex           95          946
##  7  2010 Paula          90          981
##  8  2010 Richard        85          977
##  9  2010 Tomas          85          982
## 10  2010 Lisa           75          982
## # ℹ 92 more rows

##task 3.1 (pivoting)

storm_long <- storms %>%
  select(category, wind, pressure) %>%
  pivot_longer(
    cols = c(wind, pressure),
    names_to = "metric",
    values_to = "value"
  )
print(storm_long)
## # A tibble: 39,074 × 3
##    category metric   value
##       <dbl> <chr>    <int>
##  1       NA wind        25
##  2       NA pressure  1013
##  3       NA wind        25
##  4       NA pressure  1013
##  5       NA wind        25
##  6       NA pressure  1013
##  7       NA wind        25
##  8       NA pressure  1013
##  9       NA wind        25
## 10       NA pressure  1012
## # ℹ 39,064 more rows

##task 3.2 (unite)

storms_with_date <- storms %>%
  unite(date_id, year, month, day, sep = "-", remove = FALSE)
storms_with_date
## # A tibble: 19,537 × 14
##    name  date_id    year month   day  hour   lat  long status     category  wind
##    <chr> <chr>     <dbl> <dbl> <int> <dbl> <dbl> <dbl> <fct>         <dbl> <int>
##  1 Amy   1975-6-27  1975     6    27     0  27.5 -79   tropical …       NA    25
##  2 Amy   1975-6-27  1975     6    27     6  28.5 -79   tropical …       NA    25
##  3 Amy   1975-6-27  1975     6    27    12  29.5 -79   tropical …       NA    25
##  4 Amy   1975-6-27  1975     6    27    18  30.5 -79   tropical …       NA    25
##  5 Amy   1975-6-28  1975     6    28     0  31.5 -78.8 tropical …       NA    25
##  6 Amy   1975-6-28  1975     6    28     6  32.4 -78.7 tropical …       NA    25
##  7 Amy   1975-6-28  1975     6    28    12  33.3 -78   tropical …       NA    25
##  8 Amy   1975-6-28  1975     6    28    18  34   -77   tropical …       NA    30
##  9 Amy   1975-6-29  1975     6    29     0  34.4 -75.8 tropical …       NA    35
## 10 Amy   1975-6-29  1975     6    29     6  34   -74.8 tropical …       NA    40
## # ℹ 19,527 more rows
## # ℹ 3 more variables: pressure <int>, tropicalstorm_force_diameter <int>,
## #   hurricane_force_diameter <int>

##part 4 (interpretation) ##According to the results of Task 2.3, the most intense storm activity occurred in the year that showed the highest maximum wind speed and the lowest minimum pressure among hurricanes after 2010. Since higher wind speed and lower pressure indicate more severe storms, this combination points to stronger hurricanes. The summarized results show that 2010 stands out, with Hurricane Igor reaching a wind speed of 135 and a pressure of 924.