##task 1.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'tibble' was built under R version 4.5.1
## Warning: package 'tidyr' was built under R version 4.5.2
## Warning: package 'readr' was built under R version 4.5.1
## Warning: package 'purrr' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.1
## Warning: package 'stringr' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.2
## Warning: package 'lubridate' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data=storms
glimpse(data)
## Rows: 19,537
## Columns: 13
## $ name <chr> "Amy", "Amy", "Amy", "Amy", "Amy", "Amy",…
## $ year <dbl> 1975, 1975, 1975, 1975, 1975, 1975, 1975,…
## $ month <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ day <int> 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 2…
## $ hour <dbl> 0, 6, 12, 18, 0, 6, 12, 18, 0, 6, 12, 18,…
## $ lat <dbl> 27.5, 28.5, 29.5, 30.5, 31.5, 32.4, 33.3,…
## $ long <dbl> -79.0, -79.0, -79.0, -79.0, -78.8, -78.7,…
## $ status <fct> tropical depression, tropical depression,…
## $ category <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wind <int> 25, 25, 25, 25, 25, 25, 25, 30, 35, 40, 4…
## $ pressure <int> 1013, 1013, 1013, 1013, 1012, 1012, 1011,…
## $ tropicalstorm_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ hurricane_force_diameter <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
str(data)
## tibble [19,537 × 13] (S3: tbl_df/tbl/data.frame)
## $ name : chr [1:19537] "Amy" "Amy" "Amy" "Amy" ...
## $ year : num [1:19537] 1975 1975 1975 1975 1975 ...
## $ month : num [1:19537] 6 6 6 6 6 6 6 6 6 6 ...
## $ day : int [1:19537] 27 27 27 27 28 28 28 28 29 29 ...
## $ hour : num [1:19537] 0 6 12 18 0 6 12 18 0 6 ...
## $ lat : num [1:19537] 27.5 28.5 29.5 30.5 31.5 32.4 33.3 34 34.4 34 ...
## $ long : num [1:19537] -79 -79 -79 -79 -78.8 -78.7 -78 -77 -75.8 -74.8 ...
## $ status : Factor w/ 9 levels "disturbance",..: 7 7 7 7 7 7 7 7 8 8 ...
## $ category : num [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ wind : int [1:19537] 25 25 25 25 25 25 25 30 35 40 ...
## $ pressure : int [1:19537] 1013 1013 1013 1013 1012 1012 1011 1006 1004 1002 ...
## $ tropicalstorm_force_diameter: int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
## $ hurricane_force_diameter : int [1:19537] NA NA NA NA NA NA NA NA NA NA ...
##task 1.2 ##The storms dataset contains over 19,537 rows and 13 columns. Each row represents a single recorded observation of a storm at a specific date and time, including information such as its name, location, wind speed, pressure, and status. ##Two potential data quality issues are the presence of missing values in the pressure variable and the fact that some storms change status over time, which requires careful filtering during analysis. #task 2.1 (filtering and selecting)
hurricane_subset <- storms %>%
filter(status == "hurricane", year > 2000) %>%
select(name, year, month, day, category, wind)
print(hurricane_subset)
## # A tibble: 2,506 × 6
## name year month day category wind
## <chr> <dbl> <dbl> <int> <dbl> <int>
## 1 Erin 2001 9 9 1 75
## 2 Erin 2001 9 9 2 90
## 3 Erin 2001 9 9 2 95
## 4 Erin 2001 9 9 3 105
## 5 Erin 2001 9 10 3 105
## 6 Erin 2001 9 10 3 105
## 7 Erin 2001 9 10 3 100
## 8 Erin 2001 9 10 2 90
## 9 Erin 2001 9 11 1 80
## 10 Erin 2001 9 11 1 80
## # ℹ 2,496 more rows
#task 2.2 (mutating)
hurricane_subset <- hurricane_subset %>%
mutate(wind_kph = wind * 1.852)
print(hurricane_subset)
## # A tibble: 2,506 × 7
## name year month day category wind wind_kph
## <chr> <dbl> <dbl> <int> <dbl> <int> <dbl>
## 1 Erin 2001 9 9 1 75 139.
## 2 Erin 2001 9 9 2 90 167.
## 3 Erin 2001 9 9 2 95 176.
## 4 Erin 2001 9 9 3 105 194.
## 5 Erin 2001 9 10 3 105 194.
## 6 Erin 2001 9 10 3 105 194.
## 7 Erin 2001 9 10 3 100 185.
## 8 Erin 2001 9 10 2 90 167.
## 9 Erin 2001 9 11 1 80 148.
## 10 Erin 2001 9 11 1 80 148.
## # ℹ 2,496 more rows
#task 2.3 (summarizing)
storm_of_the_century <- storms %>%
filter(year >= 2010, status == "hurricane") %>%
group_by(year, name) %>%
summarise(
max_wind = max(wind, na.rm = TRUE),
min_pressure = min(pressure, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(year, desc(max_wind))
print(storm_of_the_century)
## # A tibble: 102 × 4
## year name max_wind min_pressure
## <dbl> <chr> <int> <int>
## 1 2010 Igor 135 924
## 2 2010 Earl 125 927
## 3 2010 Julia 120 948
## 4 2010 Danielle 115 942
## 5 2010 Karl 110 956
## 6 2010 Alex 95 946
## 7 2010 Paula 90 981
## 8 2010 Richard 85 977
## 9 2010 Tomas 85 982
## 10 2010 Lisa 75 982
## # ℹ 92 more rows
##task 3.1 (pivoting)
storm_long <- storms %>%
select(category, wind, pressure) %>%
pivot_longer(
cols = c(wind, pressure),
names_to = "metric",
values_to = "value"
)
print(storm_long)
## # A tibble: 39,074 × 3
## category metric value
## <dbl> <chr> <int>
## 1 NA wind 25
## 2 NA pressure 1013
## 3 NA wind 25
## 4 NA pressure 1013
## 5 NA wind 25
## 6 NA pressure 1013
## 7 NA wind 25
## 8 NA pressure 1013
## 9 NA wind 25
## 10 NA pressure 1012
## # ℹ 39,064 more rows
##task 3.2 (unite)
storms_with_date <- storms %>%
unite(date_id, year, month, day, sep = "-", remove = FALSE)
storms_with_date
#part 4 (interpretation) ##Based on the results from Task 2.3, the year with the most intense storm activity is the year that recorded the highest maximum wind speed along with the lowest minimum pressure among hurricanes since 2010. This combination indicates stronger and more intense storms, as higher wind speeds and lower pressure are key indicators of hurricane severity. The summarized data clearly shows that (2010 Igor 135 924)the year 2010 is standing out compared to others.