OAW Data

Harold Nelson

2025-06-24

OAW Data

First walk through the process of getting the data from GHCN.

Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import

library(readr)
OAW2506 <- read_csv("~/Downloads/4054432.csv")
## Rows: 30721 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): STATION
## dbl  (3): PRCP, TMAX, TMIN
## date (1): DATE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(OAW2506)
## spc_tbl_ [30,721 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ STATION: chr [1:30721] "USW00024227" "USW00024227" "USW00024227" "USW00024227" ...
##  $ DATE   : Date[1:30721], format: "1941-05-13" "1941-05-14" ...
##  $ PRCP   : num [1:30721] 0 0 0.3 1.08 0.06 0 0 0 0 0 ...
##  $ TMAX   : num [1:30721] 66 63 58 55 57 59 58 65 68 85 ...
##  $ TMIN   : num [1:30721] 50 47 44 45 46 39 40 50 42 46 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   STATION = col_character(),
##   ..   DATE = col_date(format = ""),
##   ..   PRCP = col_double(),
##   ..   TMAX = col_double(),
##   ..   TMIN = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Create New Columns

OAW2506 = OAW2506 %>% 
  mutate(mo = month(DATE),
         dy = day(DATE),
         yr = year(DATE)) %>% 
  select(-STATION)

OAW2506
## # A tibble: 30,721 × 7
##    DATE        PRCP  TMAX  TMIN    mo    dy    yr
##    <date>     <dbl> <dbl> <dbl> <dbl> <int> <dbl>
##  1 1941-05-13  0       66    50     5    13  1941
##  2 1941-05-14  0       63    47     5    14  1941
##  3 1941-05-15  0.3     58    44     5    15  1941
##  4 1941-05-16  1.08    55    45     5    16  1941
##  5 1941-05-17  0.06    57    46     5    17  1941
##  6 1941-05-18  0       59    39     5    18  1941
##  7 1941-05-19  0       58    40     5    19  1941
##  8 1941-05-20  0       65    50     5    20  1941
##  9 1941-05-21  0       68    42     5    21  1941
## 10 1941-05-22  0       85    46     5    22  1941
## # ℹ 30,711 more rows

Make Longer

Collapse PRCP, TMAX, and TMIN into a single column metric.

Solution

longer = OAW2506 %>% 
  pivot_longer(cols = PRCP:TMIN,
               names_to = "metric",
               values_to = "value")
longer
## # A tibble: 92,163 × 6
##    DATE          mo    dy    yr metric value
##    <date>     <dbl> <int> <dbl> <chr>  <dbl>
##  1 1941-05-13     5    13  1941 PRCP    0   
##  2 1941-05-13     5    13  1941 TMAX   66   
##  3 1941-05-13     5    13  1941 TMIN   50   
##  4 1941-05-14     5    14  1941 PRCP    0   
##  5 1941-05-14     5    14  1941 TMAX   63   
##  6 1941-05-14     5    14  1941 TMIN   47   
##  7 1941-05-15     5    15  1941 PRCP    0.3 
##  8 1941-05-15     5    15  1941 TMAX   58   
##  9 1941-05-15     5    15  1941 TMIN   44   
## 10 1941-05-16     5    16  1941 PRCP    1.08
## # ℹ 92,153 more rows

Wider

Restore the original dataframe into wider.

wider = longer %>% 
  pivot_wider(
    names_from = metric,
    values_from = value
  )

wider
## # A tibble: 30,721 × 7
##    DATE          mo    dy    yr  PRCP  TMAX  TMIN
##    <date>     <dbl> <int> <dbl> <dbl> <dbl> <dbl>
##  1 1941-05-13     5    13  1941  0       66    50
##  2 1941-05-14     5    14  1941  0       63    47
##  3 1941-05-15     5    15  1941  0.3     58    44
##  4 1941-05-16     5    16  1941  1.08    55    45
##  5 1941-05-17     5    17  1941  0.06    57    46
##  6 1941-05-18     5    18  1941  0       59    39
##  7 1941-05-19     5    19  1941  0       58    40
##  8 1941-05-20     5    20  1941  0       65    50
##  9 1941-05-21     5    21  1941  0       68    42
## 10 1941-05-22     5    22  1941  0       85    46
## # ℹ 30,711 more rows