The goal of this notebook is to summarize the weather data
Summarizing with math:
New explorations:
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
weather <- read_rds("data-processed/01-weather.rds")
weather
Creating a yr, mo, abd yday columns
weather_yr <- weather %>%
mutate(
yr = year(date),
mo = month(date, label = TRUE),
yday = yday(date)
)
weather_yr %>% glimpse()
## Rows: 30,579
## Columns: 11
## $ station <chr> "USW00013960", "USW00013960", "USW00013960", "USW00013960", "U…
## $ name <chr> "DALLAS FAA AIRPORT, TX US", "DALLAS FAA AIRPORT, TX US", "DAL…
## $ date <date> 1939-08-01, 1939-08-02, 1939-08-03, 1939-08-04, 1939-08-05, 1…
## $ prcp <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.03, 0.00, 0.00, 0.…
## $ snow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax <dbl> 96, 94, 97, 100, 99, 96, 99, 85, 96, 98, 98, 99, 102, 102, 100…
## $ tmin <dbl> 74, 75, 77, 77, 73, 73, 77, 79, 74, 73, 78, 78, 77, 76, 77, 76…
## $ yr <dbl> 1939, 1939, 1939, 1939, 1939, 1939, 1939, 1939, 1939, 1939, 19…
## $ mo <ord> Aug, Aug, Aug, Aug, Aug, Aug, Aug, Aug, Aug, Aug, Aug, Aug, Au…
## $ yday <dbl> 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 22…
Finding which days had the most rain and how much
weather_yr %>%
arrange(desc(prcp)) %>%
select(c(date, prcp)) %>%
filter(prcp >= 5)
Finding the hottest and coldest days in history
weather_yr %>%
arrange(desc(tmax)) %>%
select(c(date, tmax)) %>%
filter(tmax >= 110)
weather_yr %>%
arrange(tmin) %>%
select(c(date, tmin)) %>%
filter(tmin <= 7)
Finding the year with the most days of tmax 100+
weather_yr %>%
filter(tmax >= 100) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(days)) %>%
filter(days >= 35)
Finding how many days each year had snow
weather_yr %>%
filter(snow > 0) %>%
group_by(yr) %>%
summarize(snow_days = n()) %>%
arrange(desc(yr))
Finding the number of days each year where the tmin is 32 below
weather_yr %>%
filter(tmin <= 32) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(yr))
Finding the number of days where the tmax is not above 32
weather_yr %>%
filter(tmax <= 32) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(yr))
Number of days (per year) with 100+ in the month of May
Finding the number of days each year in May where tmax was 100+
weather_yr %>%
filter(
tmax >= 100,
month(date) == 05
) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(yr))
Finding the number of days where the tmax was 100+ in June each year
weather_yr %>%
filter(
tmax >= 100,
month(date) == 06
) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(yr))
Finding the number of days where the tmax was 100+ in July each year
weather_yr %>%
filter(
tmax >= 100,
month(date) == 07
) %>%
group_by(yr) %>%
summarize(days = n()) %>%
arrange(desc(yr))
Finding rainfall amount each year and arranging by greatest amount and least amount
weather_yr %>%
group_by(yr) %>%
summarize(rain_amount = sum(prcp)) %>%
arrange(desc(rain_amount)) %>%
filter(
rain_amount >= 47,
yr < 2023,
yr > 1939
)
weather_yr %>%
group_by(yr) %>%
summarize(rain_amount = sum(prcp)) %>%
arrange(rain_amount) %>%
filter(
rain_amount <= 25,
yr < 2023,
yr > 1939
)
Finding how much snowfall occurred in each year
weather_yr %>%
group_by(yr) %>%
summarize(snowfall_amount = sum(snow)) %>%
arrange(desc(snowfall_amount)) %>%
filter(
yr < 2023,
yr > 1939,
snowfall_amount >= 2
)
Finding the average rainfall for each month over all the years
weather_yr %>%
group_by(mo, yr) %>%
summarize(total_rainfall = sum(prcp)) %>%
group_by(mo) %>%
summarize(average_rainfall = mean(total_rainfall))
## `summarise()` has grouped output by 'mo'. You can override using the `.groups`
## argument.
What is the earliest date in each year with 100+ temperature? Which year had the earliest date?
Finding the first day each year where tmax was 100+, and which year that happened first
weather_yr %>%
group_by(yr) %>%
filter(tmax >= 100) %>%
slice_min(date) %>%
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
Finding the first day each year after July 1 that was freezing, and which year that happened first
weather_yr %>%
group_by(yr) %>%
filter(
tmin <= 32,
mo >= "Jul"
) %>%
slice_min(date) %>%
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
weather_yr %>%
group_by(yr) %>%
filter(
tmin <= 32,
mo < "Jul"
) %>%
slice_min(date) %>%
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
Finding the year that has had the average highest temperature of all time
weather_yr %>%
group_by(yr) %>%
summarize(average_high = mean(tmax)) %>%
arrange(desc(average_high)) %>%
filter(
average_high >= 78,
yr > 1939,
yr < 2023)
Finding the year that has had the average lowest temperature of all time
weather_yr %>%
group_by(yr) %>%
summarize(average_low = mean(tmin)) %>%
arrange(average_low) %>%
filter(
average_low <= 55,
yr > 1939,
yr < 2023
)
Finding the year each month has had the average highest temperature of all time
weather_yr %>%
group_by(mo, yr) %>%
summarize(average_high = mean(tmax)) %>%
slice_max(average_high)
## `summarise()` has grouped output by 'mo'. You can override using the `.groups`
## argument.
Finding the year each month has had the average lowest temperature of all time
weather_yr %>%
group_by(mo, yr) %>%
summarize(average_low = mean(tmin)) %>%
slice_max(average_low)
## `summarise()` has grouped output by 'mo'. You can override using the `.groups`
## argument.
68.02 inches of rain fell in 2015, over ten inches more than the next closest year, according to an analysis of Dallas FAA Airport weather data from 1940 to 2022, provided by the National Centers for Environmental Information.