Prompt

Historic data indicates that the occurrence and intensity of cyclonic storms (Hurricanes, Typhoons and Tornadoes) increases with the increased earth temperature. For this assignment you will need to tell this story to a non-technical audience (eg: a high-school earth science class).

R Packages

library(tidyverse)
library(httr)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(reshape2)
library(usmap)
library(tinytex)

Data Import

url <- "https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2025-02272026.txt"
url1 <- "https://www.nhc.noaa.gov/data/hurdat/hurdat2-nepac-1949-2025-02272026.txt"
url2 <- "https://raw.githubusercontent.com/Stevee-G/Data608/refs/heads/main/Assignment5/nasa_temps.csv"

out_file <- "hurdat2_atlantic.txt"
download.file(url, out_file, mode = "wb")
raw <- readLines(out_file)

parsed <- list()
i <- 1
while (i <= length(raw)) {
  header <- trimws(strsplit(raw[i], ",")[[1]])
  track_id <- header[1]
  track_name <- header[2]
  rows <- as.numeric(header[3])

  body <- raw[(i + 1):(i + rows)]
  body <- strsplit(body, ",")
  body <- lapply(body, trimws)
  body <- lapply(body, function(x) x[1:20])  # keep first 20 fields
  body <- do.call(rbind, body)

  colnames(body) <- c(
    "date","time","record","status","lat","lon","max_speed","min_press",
    "34kt_ne","34kt_se","34kt_sw","34kt_nw",
    "50kt_ne","50kt_se","50kt_sw","50kt_nw",
    "64kt_ne","64kt_se","64kt_sw","64kt_nw"
  )

  body <- as_tibble(body)
  body$lat <- ifelse(grepl("N", body$lat),
                     gsub("N","", body$lat),
                     paste0("-", gsub("S","", body$lat)))
  body$lon <- ifelse(grepl("E", body$lon),
                     gsub("E","", body$lon),
                     paste0("-", gsub("W","", body$lon)))

  parsed[[length(parsed) + 1]] <-
    bind_cols(tibble(track_id = track_id, track_name = track_name), body)

  i <- i + rows + 1
}

atlantic_df <- bind_rows(parsed)
head(atlantic_df)
## # A tibble: 6 × 22
##   track_id track_name date   time  record status lat   lon   max_speed min_press
##   <chr>    <chr>      <chr>  <chr> <chr>  <chr>  <chr> <chr> <chr>     <chr>    
## 1 AL011851 UNNAMED    18510… 0000  ""     HU     28.0  -94.8 80        -999     
## 2 AL011851 UNNAMED    18510… 0600  ""     HU     28.0  -95.4 80        -999     
## 3 AL011851 UNNAMED    18510… 1200  ""     HU     28.0  -96.0 80        -999     
## 4 AL011851 UNNAMED    18510… 1800  ""     HU     28.1  -96.5 80        -999     
## 5 AL011851 UNNAMED    18510… 2100  "L"    HU     28.2  -96.8 80        -999     
## 6 AL011851 UNNAMED    18510… 0000  ""     HU     28.2  -97.0 70        -999     
## # ℹ 12 more variables: `34kt_ne` <chr>, `34kt_se` <chr>, `34kt_sw` <chr>,
## #   `34kt_nw` <chr>, `50kt_ne` <chr>, `50kt_se` <chr>, `50kt_sw` <chr>,
## #   `50kt_nw` <chr>, `64kt_ne` <chr>, `64kt_se` <chr>, `64kt_sw` <chr>,
## #   `64kt_nw` <chr>
out_file1 <- "hurdat2_pacific.txt"
download.file(url1, out_file1, mode = "wb")
raw1 <- readLines(out_file1)

parsed1 <- list()
j <- 1
while (j <= length(raw1)) {
  header <- trimws(strsplit(raw1[j], ",")[[1]])
  track_id <- header[1]
  track_name <- header[2]
  rows <- as.numeric(header[3])

  body <- raw1[(j + 1):(j + rows)]
  body <- strsplit(body, ",")
  body <- lapply(body, trimws)
  body <- lapply(body, function(x) x[1:20])  # keep first 20 fields
  body <- do.call(rbind, body)

  colnames(body) <- c(
    "date","time","record","status","lat","lon","max_speed","min_press",
    "34kt_ne","34kt_se","34kt_sw","34kt_nw",
    "50kt_ne","50kt_se","50kt_sw","50kt_nw",
    "64kt_ne","64kt_se","64kt_sw","64kt_nw"
  )

  body <- as_tibble(body)
  body$lat <- ifelse(grepl("N", body$lat),
                     gsub("N","", body$lat),
                     paste0("-", gsub("S","", body$lat)))
  body$lon <- ifelse(grepl("E", body$lon),
                     gsub("E","", body$lon),
                     paste0("-", gsub("W","", body$lon)))

  parsed1[[length(parsed1) + 1]] <-
    bind_cols(tibble(track_id = track_id, track_name = track_name), body)

  j <- j + rows + 1
}

pacific_df <- bind_rows(parsed1)
head(pacific_df)
## # A tibble: 6 × 22
##   track_id track_name date   time  record status lat   lon   max_speed min_press
##   <chr>    <chr>      <chr>  <chr> <chr>  <chr>  <chr> <chr> <chr>     <chr>    
## 1 EP011949 UNNAMED    19490… 0000  ""     TS     20.2  -106… 45        -999     
## 2 EP011949 UNNAMED    19490… 0600  ""     TS     20.2  -106… 45        -999     
## 3 EP011949 UNNAMED    19490… 1200  ""     TS     20.2  -106… 45        -999     
## 4 EP011949 UNNAMED    19490… 1800  ""     TS     20.3  -107… 45        -999     
## 5 EP011949 UNNAMED    19490… 0000  ""     TS     20.4  -108… 45        -999     
## 6 EP011949 UNNAMED    19490… 0600  ""     TS     20.5  -109… 45        -999     
## # ℹ 12 more variables: `34kt_ne` <chr>, `34kt_se` <chr>, `34kt_sw` <chr>,
## #   `34kt_nw` <chr>, `50kt_ne` <chr>, `50kt_se` <chr>, `50kt_sw` <chr>,
## #   `50kt_nw` <chr>, `64kt_ne` <chr>, `64kt_se` <chr>, `64kt_sw` <chr>,
## #   `64kt_nw` <chr>
temp_df <- read_csv(url2)
glimpse(temp_df)
## Rows: 146
## Columns: 3
## $ year      <dbl> 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, …
## $ mean      <dbl> -0.18, -0.09, -0.11, -0.17, -0.28, -0.34, -0.32, -0.37, -0.1…
## $ smoothing <dbl> -0.10, -0.13, -0.17, -0.21, -0.24, -0.27, -0.28, -0.28, -0.2…

Data Wrangling

atl_trim <- atlantic_df %>% 
  mutate(year = format(as.Date(atlantic_df$date,
                               format = "%Y%m%d"), "%Y")) %>%
  arrange(desc(max_speed)) %>% 
  distinct(track_id, .keep_all = TRUE) %>% 
  arrange(year, track_id) %>% 
  select(year, track_id, max_speed, status) %>% 
  filter(year >= 1950)
glimpse(atl_trim)
## Rows: 1,198
## Columns: 4
## $ year      <chr> "1950", "1950", "1950", "1950", "1950", "1950", "1950", "195…
## $ track_id  <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950", …
## $ max_speed <chr> "95", "90", "95", "90", "95", "95", "95", "40", "90", "90", …
## $ status    <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU", …
pa_trim <- pacific_df %>% 
  mutate(year = format(as.Date(pacific_df$date,
                               format = "%Y%m%d"), "%Y")) %>%
  arrange(desc(max_speed)) %>% 
  distinct(track_id, .keep_all = TRUE) %>% 
  arrange(year, track_id) %>% 
  select(year, track_id, max_speed, status) %>% 
  filter(year >= 1950)
glimpse(pa_trim)
## Rows: 1,256
## Columns: 4
## $ year      <chr> "1950", "1950", "1950", "1950", "1950", "1950", "1950", "195…
## $ track_id  <chr> "CP011950", "EP011950", "EP021950", "EP031950", "EP041950", …
## $ max_speed <chr> "75", "75", "75", "75", "45", "75", "75", "45", "75", "45", …
## $ status    <chr> "HU", "HU", "HU", "HU", "TS", "HU", "HU", "TS", "HU", "TS", …
storms <- bind_rows(atl_trim, pa_trim) %>% 
  mutate(year = as.integer(year),
         max_speed = as.integer(max_speed)) %>% 
  arrange(year, track_id)
glimpse(storms)
## Rows: 2,454
## Columns: 4
## $ year      <int> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, …
## $ track_id  <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950", …
## $ max_speed <int> 95, 90, 95, 90, 95, 95, 95, 40, 90, 90, 90, 60, 70, 40, 45, …
## $ status    <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU", …
temp_trim <- temp_df %>% 
  mutate(temperature = smoothing) %>% 
  filter(year >= 1950) %>% 
  select(year, temperature)
glimpse(temp_trim)
## Rows: 76
## Columns: 2
## $ year        <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959…
## $ temperature <dbl> -0.08, -0.07, -0.07, -0.07, -0.07, -0.06, -0.05, -0.04, -0…
all <- full_join(storms, temp_trim,
                        join_by(year == year),
                        relationship = "many-to-one")
glimpse(all)
## Rows: 2,454
## Columns: 5
## $ year        <dbl> 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950, 1950…
## $ track_id    <chr> "AL011950", "AL021950", "AL031950", "AL041950", "AL051950"…
## $ max_speed   <int> 95, 90, 95, 90, 95, 95, 95, 40, 90, 90, 90, 60, 70, 40, 45…
## $ status      <chr> "HU", "HU", "HU", "HU", "HU", "HU", "HU", "TS", "HU", "HU"…
## $ temperature <dbl> -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0.08, -0…
all_trim <- all %>% 
  group_by(year,temperature) %>% 
  summarize(
    n_storms = n(),
    n_hurricane = sum(status == "HU"),
    p_hurricane = round(n_hurricane/n_storms,2),
    avg_speed = round(mean(max_speed),2)
  )
glimpse(all_trim)
## Rows: 76
## Columns: 6
## Groups: year [76]
## $ year        <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959…
## $ temperature <dbl> -0.08, -0.07, -0.07, -0.07, -0.07, -0.06, -0.05, -0.04, -0…
## $ n_storms    <int> 23, 21, 18, 18, 26, 20, 23, 21, 25, 29, 15, 21, 16, 18, 18…
## $ n_hurricane <int> 17, 10, 8, 9, 10, 12, 11, 11, 12, 12, 9, 10, 6, 11, 8, 5, …
## $ p_hurricane <dbl> 0.74, 0.48, 0.44, 0.50, 0.38, 0.60, 0.48, 0.52, 0.48, 0.41…
## $ avg_speed   <dbl> 75.43, 63.81, 64.72, 64.17, 60.96, 70.75, 61.96, 65.62, 66…

Data Visualizations

ggplot(all_trim, aes(x = year, y = n_storms, fill = n_storms)) +
  geom_col() +
  scale_fill_gradient2(low = "whitesmoke", mid = "darkgrey" , high = "darkblue",
                         midpoint=median(all_trim$n_storms)) +
  labs(title = "Bar Plot: Storm Activity Since 1950",
       x = "Year",
       y = "") +
  theme_minimal() +
  theme(legend.position="none",
        plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x = year, y = n_hurricane, fill = n_hurricane)) +
  geom_col() +
  scale_fill_gradient2(low = "violet", mid = "darkviolet" , high = "darkblue",
                         midpoint=median(all_trim$n_hurricane)) +
  labs(title = "Bar Plot: Hurricane Activity",
       x = "Year",
       y = "") +
  theme_minimal() +
  theme(legend.position="none",
        plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x = year, y = p_hurricane, color = p_hurricane)) +
  geom_line(linewidth = 2) +
  scale_colour_gradient2(low = "violet", mid = "darkviolet" , high = "darkblue",
                         midpoint=median(all_trim$p_hurricane)) +
  labs(title = "Line Plot: Proportion of Hurricanes per Storm Season",
       x = "Year",
       y = "Proportion") +
  theme_minimal() +
  theme(legend.position="none",
        plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x = year, y = avg_speed, color = avg_speed)) +
  geom_line(linewidth = 2) +
  scale_colour_gradient2(low = "whitesmoke", mid = "darkgrey" , high = "darkblue",
                         midpoint=median(all_trim$avg_speed)) +
  labs(title = "Line Plot: Average Speed per Storm Season",
       x = "Year",
       y = "Speed") +
  theme_minimal() +
  theme(legend.position="none",
        plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x = year, y = temperature, color = temperature)) +
  geom_line(linewidth = 2) +
  scale_colour_gradient2(low = "yellow", mid = "orange" , high = "red",
                         midpoint=median(all_trim$temperature)) +
  labs(title = "Line Plot: Rise in Global Temperature Over the Years",
       x = "Year",
       y = "Temp (C)") +
  theme_minimal() +
  theme(legend.position="none",
        plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x=temperature, y=n_storms)) +
  geom_point() +
  labs(title = "Scatterplot: Temperature vs Number of Storms",
       x = "Temp (C)",
       y = "Storms") +
  geom_smooth(color = "darkblue") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x=temperature, y=p_hurricane)) +
  geom_point() +
  labs(title = "Scatterplot: Temperature vs Proportion of Hurricanes",
       x = "Temp (C)",
       y = "Hurricane Proportion") +
  geom_smooth(color = "darkviolet") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

ggplot(all_trim, aes(x=temperature, y=avg_speed)) +
  geom_point() +
  labs(title = "Scatterplot: Temperature vs Storm Speed",
       x = "Temp (C)",
       y = "Speed") +
  geom_smooth(color = "darkgrey") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

Sources

Storm Data:

https://www.nhc.noaa.gov/data/#hurdat

Climate Data:

https://science.nasa.gov/earth/explore/earth-indicators/global-temperature/