##task 1.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.1
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.1
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.1
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data=storms
glimpse(data)
## Rows: 19,537
## Columns: 13
## $ name <chr> "Amy", "Amy", "Amy", "Amy", "Amy", "Amy",…
## $ year <dbl> 1975, 1975, 1975, 1975, 1975, 1975, 1975,…
## $ month <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ day <int> 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 2…
## $ hour <dbl> 0, 6, 12, 18, 0, 6, 12, 18, 0, 6, 12, 18,…
## $ lat <dbl> 27.5, 28.5, 29.5, 30.5, 31.5, 32.4, 33.3,…
## $ long <dbl> -79.0, -79.0, -79.0, -79.0, -78.8, -78.7,…
## $ status <fct> tropical depression, tropical depression,…
## $ category <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wind <int> 25, 25, 25, 25, 25, 25, 25, 30, 35, 40, 4…
## $ pressure <int> 1013, 1013, 1013, 1013, 1012, 1012, 1011,…
## $ tropicalstorm_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hurricane_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
str(data)
## tibble [19,537 × 13] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:19537] "Amy" "Amy" "Amy" "Amy" ...
## $ year : num [1:19537] 1975 1975 1975 1975 1975 ...
## $ month : num [1:19537] 6 6 6 6 6 6 6 6 6 6 ...
## $ day : int [1:19537] 27 27 27 27 28 28 28 28 29 29 ...
## $ hour : num [1:19537] 0 6 12 18 0 6 12 18 0 6 ...
## $ lat : num [1:19537] 27.5 28.5 29.5 30.5 31.5 32.4 33.3 34 34.4 34 ...
## $ long : num [1:19537] -79 -79 -79 -79 -78.8 -78.7 -78 -77 -75.8 -74.8 ...
## $ status : Factor w/ 9 levels "disturbance",..: 7 7 7 7 7 7 7 7 8 8 ...
## $ category : num [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ wind : int [1:19537] 25 25 25 25 25 25 25 30 35 40 ...
## $ pressure : int [1:19537] 1013 1013 1013 1013 1012 1012 1011 1006 1004 1002 ...
## $ tropicalstorm_force_diameter: int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ hurricane_force_diameter : int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
##task 1.2
##The storms dataset includes more than 19,537 observations across 13 variables. Each record corresponds to a storm at a particular date and time, providing details such as the storm’s name, position, wind speed, pressure, and status. ##Some data quality concerns include missing values in the pressure field and storms that change status over time, which must be handled carefully during analysis.
##task 2.1 (filtering and selecting)
hurricane_subset <- storms %>%
filter(status == "hurricane", year > 2000) %>%
select(name, year, month, day, category, wind)
print(hurricane_subset)
## # A tibble: 2,506 × 6
## name year month day category wind
## <chr> <dbl> <dbl> <int> <dbl> <int>
## 1 Erin 2001 9 9 1 75
## 2 Erin 2001 9 9 2 90
## 3 Erin 2001 9 9 2 95
## 4 Erin 2001 9 9 3 105
## 5 Erin 2001 9 10 3 105
## 6 Erin 2001 9 10 3 105
## 7 Erin 2001 9 10 3 100
## 8 Erin 2001 9 10 2 90
## 9 Erin 2001 9 11 1 80
## 10 Erin 2001 9 11 1 80
## # ℹ 2,496 more rows
#task 2.2 (mutating)
hurricane_subset <- hurricane_subset %>%
mutate(wind_kph = wind * 1.852)
print(hurricane_subset)
## # A tibble: 2,506 × 7
## name year month day category wind wind_kph
## <chr> <dbl> <dbl> <int> <dbl> <int> <dbl>
## 1 Erin 2001 9 9 1 75 139.
## 2 Erin 2001 9 9 2 90 167.
## 3 Erin 2001 9 9 2 95 176.
## 4 Erin 2001 9 9 3 105 194.
## 5 Erin 2001 9 10 3 105 194.
## 6 Erin 2001 9 10 3 105 194.
## 7 Erin 2001 9 10 3 100 185.
## 8 Erin 2001 9 10 2 90 167.
## 9 Erin 2001 9 11 1 80 148.
## 10 Erin 2001 9 11 1 80 148.
## # ℹ 2,496 more rows
#task 2.3 (summarizing)
storm_of_the_century <- storms %>%
filter(year >= 2010, status == "hurricane") %>%
group_by(year, name) %>%
summarise(
max_wind = max(wind, na.rm = TRUE),
min_pressure = min(pressure, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(year, desc(max_wind))
print(storm_of_the_century)
## # A tibble: 102 × 4
## year name max_wind min_pressure
## <dbl> <chr> <int> <int>
## 1 2010 Igor 135 924
## 2 2010 Earl 125 927
## 3 2010 Julia 120 948
## 4 2010 Danielle 115 942
## 5 2010 Karl 110 956
## 6 2010 Alex 95 946
## 7 2010 Paula 90 981
## 8 2010 Richard 85 977
## 9 2010 Tomas 85 982
## 10 2010 Lisa 75 982
## # ℹ 92 more rows
##task 3.1 (pivoting)
storm_long <- storms %>%
select(category, wind, pressure) %>%
pivot_longer(
cols = c(wind, pressure),
names_to = "metric",
values_to = "value"
)
print(storm_long)
## # A tibble: 39,074 × 3
## category metric value
## <dbl> <chr> <int>
## 1 NA wind 25
## 2 NA pressure 1013
## 3 NA wind 25
## 4 NA pressure 1013
## 5 NA wind 25
## 6 NA pressure 1013
## 7 NA wind 25
## 8 NA pressure 1013
## 9 NA wind 25
## 10 NA pressure 1012
## # ℹ 39,064 more rows
##task 3.2 (unite)
storms_with_date <- storms %>%
unite(date_id, year, month, day, sep = "-", remove = FALSE)
storms_with_date
## # A tibble: 19,537 × 14
## name date_id year month day hour lat long status category wind
## <chr> <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <fct> <dbl> <int>
## 1 Amy 1975-6-27 1975 6 27 0 27.5 -79 tropical … NA 25
## 2 Amy 1975-6-27 1975 6 27 6 28.5 -79 tropical … NA 25
## 3 Amy 1975-6-27 1975 6 27 12 29.5 -79 tropical … NA 25
## 4 Amy 1975-6-27 1975 6 27 18 30.5 -79 tropical … NA 25
## 5 Amy 1975-6-28 1975 6 28 0 31.5 -78.8 tropical … NA 25
## 6 Amy 1975-6-28 1975 6 28 6 32.4 -78.7 tropical … NA 25
## 7 Amy 1975-6-28 1975 6 28 12 33.3 -78 tropical … NA 25
## 8 Amy 1975-6-28 1975 6 28 18 34 -77 tropical … NA 30
## 9 Amy 1975-6-29 1975 6 29 0 34.4 -75.8 tropical … NA 35
## 10 Amy 1975-6-29 1975 6 29 6 34 -74.8 tropical … NA 40
## # ℹ 19,527 more rows
## # ℹ 3 more variables: pressure <int>, tropicalstorm_force_diameter <int>,
## # hurricane_force_diameter <int>
##part 4 (interpretation) ##According to the results of Task 2.3, the most intense storm activity occurred in the year that showed the highest maximum wind speed and the lowest minimum pressure among hurricanes after 2010. Since higher wind speed and lower pressure indicate more severe storms, this combination points to stronger hurricanes. The summarized results show that 2010 stands out, with Hurricane Igor reaching a wind speed of 135 and a pressure of 924.