The goal of this notebook is to summarize the weather data
Additional quests
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
weather <- read_rds("data-processed/01-weather.rds")
weather %>% glimpse()
## Rows: 30,914
## Columns: 8
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…
Creating a column for the year
weather_yr <- weather %>%
mutate(yr = year(date)) # take the year and rename it yr in its own column
weather_yr %>% glimpse()
## Rows: 30,914
## Columns: 9
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…
## $ yr <dbl> 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 19…
Finding which days had the most rain and how much rain they had
weather_yr %>%
arrange(desc(prcp)) %>% # arranging what days had the most precipitation in descending order
distinct(date, prcp) %>% # keep only the date and prcp columns
head(10) # keeping the top 10 rows only
Finding the hottest and coldest days in history
weather_yr %>%
arrange(desc(tmax)) %>% # arranging what days had the hottest temperature in descending order
distinct(date, tmax) %>% # keep only the date and tmax columns
filter(tmax >= 108) # keeping only the rows where the max temp is 108 degrees or hotter
weather_yr %>%
arrange(tmin) %>% # arranging what days had the coldest temperature in ascending order
distinct(date, tmin) %>% # keep only the date and tmin columns
filter(tmin <=12) # keeping only the rows where the min temp is 12 degrees or colder
Finding the years with the most days of tmax 100+
weather_100 <- weather_yr %>%
filter(tmax >= 100) %>% # filter tmax to 100+
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days of 100+ temperature
arrange(desc(days)) %>% # arrange appearances in descending order
filter(days >= 42) %>% # keeping only the years where the number of days was 42+
head(5)
weather_100
Finding the number of days that had snow each year
weather_yr %>%
filter(snow > 0) %>% # filter snow to greater than 0
group_by(yr) %>% # group by year
summarize(snow_days = n()) %>% # summarize number of days with snow
arrange(desc(yr)) # arrange year in descending order
Finding the number of days each year where the tmin is below 32
weather_yr %>%
filter(tmin <= 32) %>% # filter tmin to less than 0
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days where tmin is less than 32
arrange(desc(yr)) # arrange year in descending order
Finding the number of days each year where the tmax is below 32
weather_yr %>%
filter(tmax <= 32) %>% # filter tmax to less than 32
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days where tmax is less than 32
arrange(desc(yr)) # arrange year in descending order
Finding the number of days where the tmax was 100+ in May each year
weather_yr %>%
filter(
tmax >= 100, # filter tmax to 100+
month(date) == 05 # filter month to May
) %>%
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days where tmax is 100+
arrange(desc(yr)) # arrange year in descending order
Finding the number of days where the tmax was 100+ in June each year
weather_yr %>%
filter(
tmax >= 100, # filter tmax to 100+
month(date) == 06 # filter month to June
) %>%
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days where tmax is 100+
arrange(desc(yr)) # arrange year in descending order
Finding the number of days where the tmax was 100+ in July each year
weather_yr %>%
filter(
tmax >= 100, # filter tmax to 100+
month(date) == 07 # filter month to July
) %>%
group_by(yr) %>% # group by year
summarize(days = n()) %>% # summarize number of days where tmax is 100+
arrange(desc(yr)) # arrange year in descending order
Required quests
Additional quests
Creating a column for the month and yday
weather_mo <- weather_yr %>%
mutate(mo = month(date, label = TRUE)) %>% # take the date and rename it mo in its own column
mutate(yday = yday(date)) # convert the day into a number and create a yday column
weather_mo %>% glimpse()
## Rows: 30,914
## Columns: 11
## $ station <chr> "USW00013958", "USW00013958", "USW00013958", "USW00013958", "U…
## $ name <chr> "AUSTIN CAMP MABRY, TX US", "AUSTIN CAMP MABRY, TX US", "AUSTI…
## $ date <date> 1938-06-01, 1938-06-02, 1938-06-03, 1938-06-04, 1938-06-05, 1…
## $ prcp <dbl> 0.00, 0.00, 0.00, 0.40, 0.02, 0.00, 0.00, 0.00, 1.60, 0.01, 0.…
## $ snow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ snwd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tmax <dbl> 91, 94, 94, 90, 94, 92, 95, 92, 87, 90, 92, 91, 91, 91, 89, 89…
## $ tmin <dbl> 72, 67, 70, 68, 68, 70, 70, 76, 64, 76, 75, 71, 70, 68, 71, 70…
## $ yr <dbl> 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 1938, 19…
## $ mo <ord> Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Jun, Ju…
## $ yday <dbl> 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 16…
Finding rainfall amount each year and arranging by greatest amount and least amount
weather_mo %>%
group_by(yr) %>% # group by year
summarize(rain_amount = sum(prcp)) %>% # find the sum of all the rain in each year
arrange(desc(rain_amount)) %>% # arrange sum of rain in each year in descending order
filter(
rain_amount > 40,
yr < 2023,
yr > 1938,
)
weather_mo %>%
group_by(yr) %>%
summarize(rain_amount = sum(prcp)) %>% # find the sum of all the rain in each year
arrange(rain_amount) %>% # arrange sum of rain in each year in ascending order
filter(
yr < 2023,
yr > 1938,
rain_amount < 25
)
Finding how much snowfall occurred in each year
weather_mo %>%
group_by(yr) %>%
summarize(snowfall_amount = sum(snow)) %>% # find sum of all snow in each year
arrange(desc(snowfall_amount)) %>% # arrange sum of rain in each year in descending order
filter(
yr < 2023,
yr > 1938,
snowfall_amount > 1
)
Finding the average rainfall for each month over all the years
weather_rainfall <- weather_mo %>%
group_by(mo, yr) %>% # group by both month and year
summarize(total_rainfall = sum(prcp)) %>% # find the total rainfall in each month of every year
group_by(mo) %>% # group the answers by month
summarize(average_rainfall = mean(total_rainfall)) # find the mean of all rainfall for each month over all the years
## `summarise()` has grouped output by 'mo'. You can override using the `.groups`
## argument.
weather_rainfall
What is the earliest date in each year with 100+ temperature? Which year had the earliest date?
Finding the first day each year where tmax was 100+, and which year that happened first
weather_mo %>%
group_by(yr) %>%
filter(tmax >= 100) %>%
slice_min(date) %>% # takes the earliest date value
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
Finding the first day each year after July 1 that was freezing, and which year that happened first
weather_mo %>%
group_by(yr) %>%
filter(
tmin <= 32,
mo >= "Jul"
) %>%
slice_min(date) %>% # takes the earliest date value
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
weather_mo %>%
group_by(yr) %>%
filter(
tmin <= 32,
mo < "Jul"
) %>%
slice_min(date) %>% # takes the earliest date value
select(date, tmax, tmin, yr, mo, yday) %>%
arrange(yday)
Plotting the top years with the most 100+ days
ggplot(weather_100, aes(x = yr %>% reorder(days), y = days)) + # sets x and y axes
geom_col() + # adds the bars
coord_flip() + # flips the axis
geom_text(aes(label = days), hjust = 2, color = "white") + # plots read-able votes text values on chart
labs(
title = "Top Five Years With the Most 100+ Degree Days in Austin from 1938-2023",
subtitle = str_wrap("Most 100+ days each year from the National Centers for Environmental Information's Austin Camp Mabry data"),
caption = "By Shezan Samanani",
x = "Year",
y = "Number of Days"
)
Plotting the average rainfall by month
ggplot(weather_rainfall, aes(x = mo, y = average_rainfall)) + # sets x and y axes
geom_col() + # adds the bars
labs(
title = "Average Rainfall by Month in Austin from 1938-2023",
subtitle = str_wrap("Average rainfall amount in each month from the National Centers for Environmental Information's Austin Camp Mabry data"),
caption = "By Shezan Samanani",
x = "Month",
y = "Average Rainfall Amount (inches)"
)
Finding the yearly average high and low temperatures and pivoting long
weather_avg <- weather_mo %>%
group_by(yr) %>%
summarize(
avg_high = mean(tmax),
avg_low = mean(tmin)
) %>%
filter(
yr > 1938,
yr < 2023
)
weather_avg
weather_long <- weather_avg %>%
pivot_longer(
cols = avg_high:avg_low,
names_to = "temp_type",
values_to = "avg_temp"
)
weather_long
Plotting the average high and low temperatures for each year
ggplot(weather_long, aes(x = yr, y = avg_temp)) +
geom_point(aes(color = temp_type)) +
geom_line(aes(color = temp_type)) + # The aes selects a color for each temp_type
labs(
title = "Average Yearly High and Low Temperatures in Austin from 1939-2022",
subtitle = str_wrap("Average high and low temperatures for each year from the National Centers for Environmental Information's Austin Camp Mabry data"),
caption = "By Shezan Samanani",
x = "Year",
y = "Average Temperature (fahrenheit)"
)