Goals of this Notebook

The goal of this notebook is to summarize the weather data

Additional quests

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
## Loading required package: timechange
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Importing clean data

weather <- read_rds("data-processed/01-weather.rds")

weather %>% glimpse()
## Rows: 30,914
## Columns: 8
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name    <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date    <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp    <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax    <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin    <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…

Creating yr column

Creating a column for the year

weather_yr <- weather %>%
  mutate(yr = year(date)) # take the year and rename it yr in its own column

weather_yr %>% glimpse()
## Rows: 30,914
## Columns: 9
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name    <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date    <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp    <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax    <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin    <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…
## $ yr      <dbl> 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 19…

Days with the most rain

Finding which days had the most rain and how much rain they had

weather_yr %>%
  arrange(desc(prcp)) %>% # arranging what days had the most precipitation in descending order
  distinct(date, prcp) %>% # keep only the date and prcp columns
  head(10) # keeping the top 10 rows only

Hottest and coldest days in history

Finding the hottest and coldest days in history

weather_yr %>%
  arrange(desc(tmax)) %>% # arranging what days had the hottest temperature in descending order
  distinct(date, tmax) %>% # keep only the date and tmax columns
  filter(tmax >= 108) # keeping only the rows where the max temp is 108 degrees or hotter
weather_yr %>%
  arrange(tmin) %>% # arranging what days had the coldest temperature in ascending order
  distinct(date, tmin) %>% # keep only the date and tmin columns
  filter(tmin <=12) # keeping only the rows where the min temp is 12 degrees or colder

Years with the most days of 100+ temperature

Finding the years with the most days of tmax 100+

weather_100 <- weather_yr %>%
  filter(tmax >= 100) %>% # filter tmax to 100+
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days of 100+ temperature
  arrange(desc(days)) %>% # arrange appearances in descending order
  filter(days >= 42) %>% # keeping only the years where the number of days was 42+
  head(5)
  
weather_100

Number of days per year with snow

Finding the number of days that had snow each year

weather_yr %>%
  filter(snow > 0) %>% # filter snow to greater than 0
  group_by(yr) %>% # group by year
  summarize(snow_days = n()) %>% # summarize number of days with snow
  arrange(desc(yr)) # arrange year in descending order

Number of days per year below freezing

Finding the number of days each year where the tmin is below 32

weather_yr %>%
  filter(tmin <= 32) %>% # filter tmin to less than 0
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days where tmin is less than 32
  arrange(desc(yr)) # arrange year in descending order

Number of days per year not above freezing

Finding the number of days each year where the tmax is below 32

weather_yr %>%
  filter(tmax <= 32) %>% # filter tmax to less than 32
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days where tmax is less than 32
  arrange(desc(yr)) # arrange year in descending order

Number of days 100+ in May per year

Finding the number of days where the tmax was 100+ in May each year

weather_yr %>%
  filter(
    tmax >= 100, # filter tmax to 100+
    month(date) == 05 # filter month to May
  ) %>%
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days where tmax is 100+
  arrange(desc(yr)) # arrange year in descending order

Number of days 100+ in June per year

Finding the number of days where the tmax was 100+ in June each year

weather_yr %>%
  filter(
    tmax >= 100, # filter tmax to 100+
    month(date) == 06 # filter month to June
  ) %>%
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days where tmax is 100+
  arrange(desc(yr)) # arrange year in descending order

Number of days 100+ in July per year

Finding the number of days where the tmax was 100+ in July each year

weather_yr %>%
  filter(
    tmax >= 100, # filter tmax to 100+
    month(date) == 07 # filter month to July
  ) %>%
  group_by(yr) %>% # group by year
  summarize(days = n()) %>% # summarize number of days where tmax is 100+
  arrange(desc(yr)) # arrange year in descending order

Goals of summarizing with math

Required quests

Additional quests

Creating mo and yday columns

Creating a column for the month and yday

weather_mo <- weather_yr %>%
  mutate(mo = month(date, label = TRUE)) %>% # take the date and rename it mo in its own column
  mutate(yday = yday(date)) # convert the day into a number and create a yday column

weather_mo %>% glimpse()
## Rows: 30,914
## Columns: 11
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name    <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date    <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp    <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax    <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin    <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…
## $ yr      <dbl> 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 19…
## $ mo      <ord> Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Ju…
## $ yday    <dbl> 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 16…

Which years had the most total rainfall and how much? Which had the least?

Finding rainfall amount each year and arranging by greatest amount and least amount

weather_mo %>%
  group_by(yr) %>% # group by year
  summarize(rain_amount = sum(prcp)) %>% # find the sum of all the rain in each year
  arrange(desc(rain_amount)) %>% # arrange sum of rain in each year in descending order
  filter(
    rain_amount > 40,
    yr < 2023,
    yr > 1938,
  )
weather_mo %>%
  group_by(yr) %>%
  summarize(rain_amount = sum(prcp)) %>% # find the sum of all the rain in each year
  arrange(rain_amount) %>% # arrange sum of rain in each year in ascending order
  filter(
    yr < 2023,
    yr > 1938,
    rain_amount < 25
  )

Which years had the most total snowfall and how much?

Finding how much snowfall occurred in each year

weather_mo %>%
  group_by(yr) %>%
  summarize(snowfall_amount = sum(snow)) %>% # find sum of all snow in each year
  arrange(desc(snowfall_amount)) %>% # arrange sum of rain in each year in descending order
  filter(
    yr < 2023,
    yr > 1938,
    snowfall_amount > 1
  )

What is the average rainfall for each month across time?

Finding the average rainfall for each month over all the years

weather_rainfall <- weather_mo %>%
  group_by(mo, yr) %>% # group by both month and year
  summarize(total_rainfall = sum(prcp)) %>% # find the total rainfall in each month of every year
  group_by(mo) %>% # group the answers by month
  summarize(average_rainfall = mean(total_rainfall)) # find the mean of all rainfall for each month over all the years
## `summarise()` has grouped output by 'mo'. You can override using the `.groups`
## argument.
weather_rainfall

What is the earliest date in each year with 100+ temperature? Which year had the earliest date?

Finding the first day each year where tmax was 100+, and which year that happened first

weather_mo %>%
  group_by(yr) %>%
  filter(tmax >= 100) %>%
  slice_min(date) %>% # takes the earliest date value
  select(date, tmax, tmin, yr, mo, yday) %>%
  arrange(yday)

What is the earliest day (but after July 1) for a freeze each year? Which year had the earliest date?

Finding the first day each year after July 1 that was freezing, and which year that happened first

weather_mo %>%
  group_by(yr) %>%
  filter(
    tmin <= 32,
    mo >= "Jul"
  ) %>%
  slice_min(date) %>% # takes the earliest date value
  select(date, tmax, tmin, yr, mo, yday) %>%
  arrange(yday)

What is the latest date (but before July 1) for a freeze each year? Which year had the latest date?

weather_mo %>%
  group_by(yr) %>% 
  filter(
    tmin <= 32,
    mo < "Jul"
  ) %>%
  slice_min(date) %>% # takes the earliest date value
  select(date, tmax, tmin, yr, mo, yday) %>%
  arrange(yday)

Goals of visualizations

Most 100+ days plot

Plotting the top years with the most 100+ days

ggplot(weather_100, aes(x = yr %>% reorder(days), y = days)) + # sets x and y axes
  geom_col() + # adds the bars
  coord_flip() + # flips the axis
  geom_text(aes(label = days), hjust = 2, color = "white") + # plots read-able votes text values on chart
  labs(
    title = "Top Five Years With the Most 100+ Degree Days in Austin from 1938-2023",
    subtitle = str_wrap("Most 100+ days each year from the National Centers for Environmental Information's Austin Camp Mabry data"),
    caption = "By Shezan Samanani",
    x = "Year",
    y = "Number of Days"
  )

Average rainfall by month plot

Plotting the average rainfall by month

ggplot(weather_rainfall, aes(x = mo, y = average_rainfall)) + # sets x and y axes
  geom_col() + # adds the bars
  labs(
    title = "Average Rainfall by Month in Austin from 1938-2023",
    subtitle = str_wrap("Average rainfall amount in each month from the National Centers for Environmental Information's Austin Camp Mabry data"),
    caption = "By Shezan Samanani",
    x = "Month",
    y = "Average Rainfall Amount (inches)"
  )

Yearly average high and low temperature

Finding the yearly average high and low temperatures and pivoting long

weather_avg <- weather_mo %>%
  group_by(yr) %>%
  summarize(
    avg_high = mean(tmax),
    avg_low = mean(tmin)
  ) %>%
  filter(
    yr > 1938,
    yr < 2023
  )

weather_avg
weather_long <- weather_avg %>%
  pivot_longer(
    cols = avg_high:avg_low,
    names_to = "temp_type",
    values_to = "avg_temp"
  )

weather_long

Yearly average high and low temperatures plot

Plotting the average high and low temperatures for each year

ggplot(weather_long, aes(x = yr, y = avg_temp)) + 
  geom_point(aes(color = temp_type)) +
  geom_line(aes(color = temp_type)) + # The aes selects a color for each temp_type
  labs(
    title = "Average Yearly High and Low Temperatures in Austin from 1939-2022",
    subtitle = str_wrap("Average high and low temperatures for each year from the National Centers for Environmental Information's Austin Camp Mabry data"),
    caption = "By Shezan Samanani",
    x = "Year",
    y = "Average Temperature (fahrenheit)"
  )