Historic data indicates that the occurrence and intensity of cyclonic storms (Hurricanes, Typhoons and Tornadoes) increases with the increased earth temperature. For this assignment you will need to tell this story to a non-technical audience (eg: a high-school earth science class).
library(tidyverse)
library(httr)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(reshape2)
library(usmap)
library(tinytex)
url <- "https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2025-02272026.txt"
url1 <- "https://www.nhc.noaa.gov/data/hurdat/hurdat2-nepac-1949-2025-02272026.txt"
url2 <- "https://raw.githubusercontent.com/Stevee-G/Data608/refs/heads/main/Assignment5/nasa_temps.csv"
out_file <- "hurdat2_atlantic.txt"
download.file(url, out_file, mode = "wb")
raw <- readLines(out_file)
parsed <- list()
i <- 1
while (i <= length(raw)) {
header <- trimws(strsplit(raw[i], ",")[[1]])
track_id <- header[1]
track_name <- header[2]
rows <- as.numeric(header[3])
body <- raw[(i + 1):(i + rows)]
body <- strsplit(body, ",")
body <- lapply(body, trimws)
body <- lapply(body, function(x) x[1:20]) # keep first 20 fields
body <- do.call(rbind, body)
colnames(body) <- c(
"date","time","record","status","lat","lon","max_speed","min_press",
"34kt_ne","34kt_se","34kt_sw","34kt_nw",
"50kt_ne","50kt_se","50kt_sw","50kt_nw",
"64kt_ne","64kt_se","64kt_sw","64kt_nw"
)
body <- as_tibble(body)
body$lat <- ifelse(grepl("N", body$lat),
gsub("N","", body$lat),
paste0("-", gsub("S","", body$lat)))
body$lon <- ifelse(grepl("E", body$lon),
gsub("E","", body$lon),
paste0("-", gsub("W","", body$lon)))
parsed[[length(parsed) + 1]] <-
bind_cols(tibble(track_id = track_id, track_name = track_name), body)
i <- i + rows + 1
}
atlantic_df <- bind_rows(parsed)
head(atlantic_df)
## # A tibble: 6 × 22
## track_id track_name date time record status lat lon max_speed min_press
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 AL011851 UNNAMED 18510… 0000 "" HU 28.0 -94.8 80 -999
## 2 AL011851 UNNAMED 18510… 0600 "" HU 28.0 -95.4 80 -999
## 3 AL011851 UNNAMED 18510… 1200 "" HU 28.0 -96.0 80 -999
## 4 AL011851 UNNAMED 18510… 1800 "" HU 28.1 -96.5 80 -999
## 5 AL011851 UNNAMED 18510… 2100 "L" HU 28.2 -96.8 80 -999
## 6 AL011851 UNNAMED 18510… 0000 "" HU 28.2 -97.0 70 -999
## # ℹ 12 more variables: `34kt_ne` <chr>, `34kt_se` <chr>, `34kt_sw` <chr>,
## # `34kt_nw` <chr>, `50kt_ne` <chr>, `50kt_se` <chr>, `50kt_sw` <chr>,
## # `50kt_nw` <chr>, `64kt_ne` <chr>, `64kt_se` <chr>, `64kt_sw` <chr>,
## # `64kt_nw` <chr>
out_file1 <- "hurdat2_pacific.txt"
download.file(url1, out_file1, mode = "wb")
raw1 <- readLines(out_file1)
parsed1 <- list()
j <- 1
while (j <= length(raw1)) {
header <- trimws(strsplit(raw1[j], ",")[[1]])
track_id <- header[1]
track_name <- header[2]
rows <- as.numeric(header[3])
body <- raw1[(j + 1):(j + rows)]
body <- strsplit(body, ",")
body <- lapply(body, trimws)
body <- lapply(body, function(x) x[1:20]) # keep first 20 fields
body <- do.call(rbind, body)
colnames(body) <- c(
"date","time","record","status","lat","lon","max_speed","min_press",
"34kt_ne","34kt_se","34kt_sw","34kt_nw",
"50kt_ne","50kt_se","50kt_sw","50kt_nw",
"64kt_ne","64kt_se","64kt_sw","64kt_nw"
)
body <- as_tibble(body)
body$lat <- ifelse(grepl("N", body$lat),
gsub("N","", body$lat),
paste0("-", gsub("S","", body$lat)))
body$lon <- ifelse(grepl("E", body$lon),
gsub("E","", body$lon),
paste0("-", gsub("W","", body$lon)))
parsed1[[length(parsed1) + 1]] <-
bind_cols(tibble(track_id = track_id, track_name = track_name), body)
j <- j + rows + 1
}
pacific_df <- bind_rows(parsed1)
head(pacific_df)
## # A tibble: 6 × 22
## track_id track_name date time record status lat lon max_speed min_press
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 EP011949 UNNAMED 19490… 0000 "" TS 20.2 -106… 45 -999
## 2 EP011949 UNNAMED 19490… 0600 "" TS 20.2 -106… 45 -999
## 3 EP011949 UNNAMED 19490… 1200 "" TS 20.2 -106… 45 -999
## 4 EP011949 UNNAMED 19490… 1800 "" TS 20.3 -107… 45 -999
## 5 EP011949 UNNAMED 19490… 0000 "" TS 20.4 -108… 45 -999
## 6 EP011949 UNNAMED 19490… 0600 "" TS 20.5 -109… 45 -999
## # ℹ 12 more variables: `34kt_ne` <chr>, `34kt_se` <chr>, `34kt_sw` <chr>,
## # `34kt_nw` <chr>, `50kt_ne` <chr>, `50kt_se` <chr>, `50kt_sw` <chr>,
## # `50kt_nw` <chr>, `64kt_ne` <chr>, `64kt_se` <chr>, `64kt_sw` <chr>,
## # `64kt_nw` <chr>
temp_df <- read_csv(url2)
glimpse(temp_df)
## Rows: 146
## Columns: 3
## $ year <dbl> 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, …
## $ mean <dbl> -0.18, -0.09, -0.11, -0.17, -0.28, -0.34, -0.32, -0.37, -0.1…
## $ smoothing <dbl> -0.10, -0.13, -0.17, -0.21, -0.24, -0.27, -0.28, -0.28, -0.2…
atl_trim <- atlantic_df %>%
mutate(year = format(as.Date(atlantic_df$date,
format = "%Y%m%d"), "%Y")) %>%
arrange(desc(max_speed)) %>%
distinct(track_id, .keep_all = TRUE) %>%
arrange(year, track_id) %>%
select(year, track_id, max_speed, status) %>%
filter(year >= 1950)
glimpse(atl_trim)
## Rows: 1,198
## Columns: 4
## $ year <chr> "1950", "1950", "1950", "1950", "1950", "1950", "1950", "195…
## $ track_id <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950", …
## $ max_speed <chr> "95", "90", "95", "90", "95", "95", "95", "40", "90", "90", …
## $ status <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU", …
pa_trim <- pacific_df %>%
mutate(year = format(as.Date(pacific_df$date,
format = "%Y%m%d"), "%Y")) %>%
arrange(desc(max_speed)) %>%
distinct(track_id, .keep_all = TRUE) %>%
arrange(year, track_id) %>%
select(year, track_id, max_speed, status) %>%
filter(year >= 1950)
glimpse(pa_trim)
## Rows: 1,256
## Columns: 4
## $ year <chr> "1950", "1950", "1950", "1950", "1950", "1950", "1950", "195…
## $ track_id <chr> "CP011950", "EP011950", "EP021950", "EP031950", "EP041950", …
## $ max_speed <chr> "75", "75", "75", "75", "45", "75", "75", "45", "75", "45", …
## $ status <chr> "HU", "HU", "HU", "HU", "TS", "HU", "HU", "TS", "HU", "TS", …
storms <- bind_rows(atl_trim, pa_trim) %>%
mutate(year = as.integer(year),
max_speed = as.integer(max_speed)) %>%
arrange(year, track_id)
glimpse(storms)
## Rows: 2,454
## Columns: 4
## $ year <int> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, …
## $ track_id <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950", …
## $ max_speed <int> 95, 90, 95, 90, 95, 95, 95, 40, 90, 90, 90, 60, 70, 40, 45, …
## $ status <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU", …
temp_trim <- temp_df %>%
mutate(temperature = smoothing) %>%
filter(year >= 1950) %>%
select(year, temperature)
glimpse(temp_trim)
## Rows: 76
## Columns: 2
## $ year <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959…
## $ temperature <dbl> -0.08, -0.07, -0.07, -0.07, -0.07, -0.06, -0.05, -0.04, -0…
all <- full_join(storms, temp_trim,
join_by(year == year),
relationship = "many-to-one")
glimpse(all)
## Rows: 2,454
## Columns: 5
## $ year <dbl> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950…
## $ track_id <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950"…
## $ max_speed <int> 95, 90, 95, 90, 95, 95, 95, 40, 90, 90, 90, 60, 70, 40, 45…
## $ status <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU"…
## $ temperature <dbl> -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0…
all_trim <- all %>%
group_by(year,temperature) %>%
summarize(
n_storms = n(),
n_hurricane = sum(status == "HU"),
p_hurricane = round(n_hurricane/n_storms,2),
avg_speed = round(mean(max_speed),2)
)
glimpse(all_trim)
## Rows: 76
## Columns: 6
## Groups: year [76]
## $ year <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959…
## $ temperature <dbl> -0.08, -0.07, -0.07, -0.07, -0.07, -0.06, -0.05, -0.04, -0…
## $ n_storms <int> 23, 21, 18, 18, 26, 20, 23, 21, 25, 29, 15, 21, 16, 18, 18…
## $ n_hurricane <int> 17, 10, 8, 9, 10, 12, 11, 11, 12, 12, 9, 10, 6, 11, 8, 5, …
## $ p_hurricane <dbl> 0.74, 0.48, 0.44, 0.50, 0.38, 0.60, 0.48, 0.52, 0.48, 0.41…
## $ avg_speed <dbl> 75.43, 63.81, 64.72, 64.17, 60.96, 70.75, 61.96, 65.62, 66…
ggplot(all_trim, aes(x = year, y = n_storms, fill = n_storms)) +
geom_col() +
scale_fill_gradient2(low = "whitesmoke", mid = "darkgrey" , high = "darkblue",
midpoint=median(all_trim$n_storms)) +
labs(title = "Bar Plot: Storm Activity Since 1950",
x = "Year",
y = "") +
theme_minimal() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x = year, y = n_hurricane, fill = n_hurricane)) +
geom_col() +
scale_fill_gradient2(low = "violet", mid = "darkviolet" , high = "darkblue",
midpoint=median(all_trim$n_hurricane)) +
labs(title = "Bar Plot: Hurricane Activity",
x = "Year",
y = "") +
theme_minimal() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x = year, y = p_hurricane, color = p_hurricane)) +
geom_line(linewidth = 2) +
scale_colour_gradient2(low = "violet", mid = "darkviolet" , high = "darkblue",
midpoint=median(all_trim$p_hurricane)) +
labs(title = "Line Plot: Proportion of Hurricanes per Storm Season",
x = "Year",
y = "Proportion") +
theme_minimal() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x = year, y = avg_speed, color = avg_speed)) +
geom_line(linewidth = 2) +
scale_colour_gradient2(low = "whitesmoke", mid = "darkgrey" , high = "darkblue",
midpoint=median(all_trim$avg_speed)) +
labs(title = "Line Plot: Average Speed per Storm Season",
x = "Year",
y = "Speed") +
theme_minimal() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x = year, y = temperature, color = temperature)) +
geom_line(linewidth = 2) +
scale_colour_gradient2(low = "yellow", mid = "orange" , high = "red",
midpoint=median(all_trim$temperature)) +
labs(title = "Line Plot: Rise in Global Temperature Over the Years",
x = "Year",
y = "Temp (C)") +
theme_minimal() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x=temperature, y=n_storms)) +
geom_point() +
labs(title = "Scatterplot: Temperature vs Number of Storms",
x = "Temp (C)",
y = "Storms") +
geom_smooth(color = "darkblue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x=temperature, y=p_hurricane)) +
geom_point() +
labs(title = "Scatterplot: Temperature vs Proportion of Hurricanes",
x = "Temp (C)",
y = "Hurricane Proportion") +
geom_smooth(color = "darkviolet") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
ggplot(all_trim, aes(x=temperature, y=avg_speed)) +
geom_point() +
labs(title = "Scatterplot: Temperature vs Storm Speed",
x = "Temp (C)",
y = "Speed") +
geom_smooth(color = "darkgrey") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Storm Data:
https://www.nhc.noaa.gov/data/#hurdat
Climate Data:
https://science.nasa.gov/earth/explore/earth-indicators/global-temperature/