#import data
library(readr)
data <- read_csv("/Users/andygarza/Downloads/us_tornado_dataset_1950_2021.csv")
## Rows: 67558 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): st
## dbl (12): yr, mo, dy, mag, inj, fat, slat, slon, elat, elon, len, wid
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Group by state and tornado count
state_counts <- data %>%
group_by(st) %>%
summarise(tornado_count = n()) %>%
arrange(desc(tornado_count))
#Print result
print(state_counts)
## # A tibble: 53 × 2
## st tornado_count
## <chr> <int>
## 1 TX 9149
## 2 KS 4375
## 3 OK 4092
## 4 FL 3497
## 5 NE 2967
## 6 IA 2773
## 7 IL 2682
## 8 MS 2476
## 9 MO 2427
## 10 AL 2358
## # ℹ 43 more rows
#find state with the highest tornado count
state_with_most_tornados <- state_counts$st[1]
max_tornado_count <- state_counts$tornado_count[1]
cat("State with the highest tornado count:", state_with_most_tornados, "with", max_tornado_count, "tornadoes.\n")
## State with the highest tornado count: TX with 9149 tornadoes.
library(dplyr)
state_counts <- data %>%
group_by(st) %>%
summarise(tornado_count = n()) %>%
arrange(desc(tornado_count))
#Filter to display top 10 states
top_10 <- state_counts %>%
slice(1:10) %>%
pull(st)
library(dplyr)
library(ggplot2)
# Group by state and tornado counts
state_counts <- data %>%
group_by(st) %>%
summarise(tornado_count = n()) %>%
arrange(desc(tornado_count))
# Filter the top 10 states
top_10 <- state_counts %>%
slice(1:10)
top_10 <- as.data.frame(top_10)
#Create visual
ggplot(top_10, aes(x = reorder(st, -tornado_count), y = tornado_count)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Tornado Count by State", x = "State", y = "Number of Tornadoes") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

library(dplyr)
library(ggplot2)
# Filter the dataset to show only the top 10 states
top_10_states <- top_10$st
filtered_data <- data %>%
filter(st %in% top_10_states)
# Group by state, year, and tornado count
heatmap_data <- filtered_data %>%
group_by(st, yr) %>%
summarise(tornado_count = n(), .groups = 'drop')
# Create visual
ggplot(heatmap_data, aes(x = yr, y = st, fill = tornado_count)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red") +
labs(
title = "Heatmap of Tornado Counts for Top 10 States",
x = "Year",
y = "State",
fill = "Tornado Count"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
