library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.1
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.1
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.1
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data=storms
glimpse(data)
## Rows: 19,537
## Columns: 13
## $ name <chr> "Amy", "Amy", "Amy", "Amy", "Amy", "Amy",…
## $ year <dbl> 1975, 1975, 1975, 1975, 1975, 1975, 1975,…
## $ month <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ day <int> 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 2…
## $ hour <dbl> 0, 6, 12, 18, 0, 6, 12, 18, 0, 6, 12, 18,…
## $ lat <dbl> 27.5, 28.5, 29.5, 30.5, 31.5, 32.4, 33.3,…
## $ long <dbl> -79.0, -79.0, -79.0, -79.0, -78.8, -78.7,…
## $ status <fct> tropical depression, tropical depression,…
## $ category <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wind <int> 25, 25, 25, 25, 25, 25, 25, 30, 35, 40, 4…
## $ pressure <int> 1013, 1013, 1013, 1013, 1012, 1012, 1011,…
## $ tropicalstorm_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hurricane_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
str(data)
## tibble [19,537 × 13] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:19537] "Amy" "Amy" "Amy" "Amy" ...
## $ year : num [1:19537] 1975 1975 1975 1975 1975 ...
## $ month : num [1:19537] 6 6 6 6 6 6 6 6 6 6 ...
## $ day : int [1:19537] 27 27 27 27 28 28 28 28 29 29 ...
## $ hour : num [1:19537] 0 6 12 18 0 6 12 18 0 6 ...
## $ lat : num [1:19537] 27.5 28.5 29.5 30.5 31.5 32.4 33.3 34 34.4 34 ...
## $ long : num [1:19537] -79 -79 -79 -79 -78.8 -78.7 -78 -77 -75.8 -74.8 ...
## $ status : Factor w/ 9 levels "disturbance",..: 7 7 7 7 7 7 7 7 8 8 ...
## $ category : num [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ wind : int [1:19537] 25 25 25 25 25 25 25 30 35 40 ...
## $ pressure : int [1:19537] 1013 1013 1013 1013 1012 1012 1011 1006 1004 1002 ...
## $ tropicalstorm_force_diameter: int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ hurricane_force_diameter : int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
##The storms dataset contains over 19,537 rows and 13 columns. Each row represents a single recorded observation of a storm at a specific date and time, including information such as its name, location, wind speed, pressure, and status. Two potential data quality issues are the presence of missing values in the pressure variable and the fact that some storms change status over time, which requires careful filtering during analysis.
hurricane_subset <- storms %>%
filter(status == "hurricane", year > 2000) %>%
select(name, year, month, day, category, wind)
hurricane_subset
hurricane_subset <- hurricane_subset %>%
mutate(wind_kph = wind * 1.852)
hurricane_subset
storm_of_the_century <- storms %>%
filter(year >= 2010, status == "hurricane") %>%
group_by(year, name) %>%
summarise(
max_wind = max(wind, na.rm = TRUE),
min_pressure = min(pressure, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(year, desc(max_wind))
storm_of_the_century
storm_long <- storms %>%
select(category, wind, pressure) %>%
pivot_longer(
cols = c(wind, pressure),
names_to = "metric",
values_to = "value"
)
storm_long
storms_with_date <- storms %>%
unite(date_id, year, month, day, sep = "-", remove = FALSE)
storms_with_date
##Based on the results from Task 2.3, the year with the most intense storm activity is the year that recorded the highest maximum wind speed along with the lowest minimum pressure among hurricanes since 2010. This combination indicates stronger and more intense storms, as higher wind speeds and lower pressure are key indicators of hurricane severity. The summarized data clearly shows that (2010 Igor 135 924)the year 2010 is standing out compared to others.