Build OAW2309

Harold Nelson

2023-10-03

Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.2.1     ✔ dplyr   1.1.1
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Import the Data

Solution

 OAW2309 <- read_csv("~/Downloads/3476420.csv", col_types = cols(DATE = col_character()))

Examine the Data

Solution

summary(OAW2309)
##    STATION              NAME              LATITUDE       LONGITUDE     
##  Length:30091       Length:30091       Min.   :46.97   Min.   :-122.9  
##  Class :character   Class :character   1st Qu.:46.97   1st Qu.:-122.9  
##  Mode  :character   Mode  :character   Median :46.97   Median :-122.9  
##                                        Mean   :46.97   Mean   :-122.9  
##                                        3rd Qu.:46.97   3rd Qu.:-122.9  
##                                        Max.   :46.97   Max.   :-122.9  
##                                                                        
##    ELEVATION        DATE                PRCP             TMAX       
##  Min.   :60.9   Length:30091       Min.   :0.0000   Min.   : 18.00  
##  1st Qu.:60.9   Class :character   1st Qu.:0.0000   1st Qu.: 50.00  
##  Median :60.9   Mode  :character   Median :0.0000   Median : 59.00  
##  Mean   :60.9                      Mean   :0.1362   Mean   : 60.61  
##  3rd Qu.:60.9                      3rd Qu.:0.1400   3rd Qu.: 71.00  
##  Max.   :60.9                      Max.   :4.8200   Max.   :110.00  
##                                    NA's   :3        NA's   :13      
##       TMIN      
##  Min.   :-8.00  
##  1st Qu.:33.00  
##  Median :40.00  
##  Mean   :39.86  
##  3rd Qu.:47.00  
##  Max.   :69.00  
##  NA's   :13
glimpse(OAW2309)
## Rows: 30,091
## Columns: 9
## $ STATION   <chr> "USW00024227", "USW00024227", "USW00024227", "USW00024227", …
## $ NAME      <chr> "OLYMPIA AIRPORT, WA US", "OLYMPIA AIRPORT, WA US", "OLYMPIA…
## $ LATITUDE  <dbl> 46.97371, 46.97371, 46.97371, 46.97371, 46.97371, 46.97371, …
## $ LONGITUDE <dbl> -122.9049, -122.9049, -122.9049, -122.9049, -122.9049, -122.…
## $ ELEVATION <dbl> 60.9, 60.9, 60.9, 60.9, 60.9, 60.9, 60.9, 60.9, 60.9, 60.9, …
## $ DATE      <chr> "1941-05-13", "1941-05-14", "1941-05-15", "1941-05-16", "194…
## $ PRCP      <dbl> 0.00, 0.00, 0.30, 1.08, 0.06, 0.00, 0.00, 0.00, 0.00, 0.00, …
## $ TMAX      <dbl> 66, 63, 58, 55, 57, 59, 58, 65, 68, 85, 84, 75, 72, 59, 61, …
## $ TMIN      <dbl> 50, 47, 44, 45, 46, 39, 40, 50, 42, 46, 46, 50, 41, 37, 48, …

When NA?

Solution

OAW2309 %>% 
  filter(is.na(PRCP) | is.na(TMAX) | is.na(TMIN))
## # A tibble: 16 × 9
##    STATION     NAME         LATITUDE LONGITUDE ELEVATION DATE   PRCP  TMAX  TMIN
##    <chr>       <chr>           <dbl>     <dbl>     <dbl> <chr> <dbl> <dbl> <dbl>
##  1 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1996… NA       39    33
##  2 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1996…  0.12    67    NA
##  3 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1996… NA       NA    NA
##  4 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1996… NA       NA    NA
##  5 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    28
##  6 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       61    NA
##  7 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    28
##  8 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0.39    NA    NA
##  9 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0.35    NA    NA
## 10 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    NA
## 11 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    NA
## 12 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    NA
## 13 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    NA
## 14 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 1997…  0       NA    NA
## 15 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 2023…  0       NA    NA
## 16 USW00024227 OLYMPIA AIR…     47.0     -123.      60.9 2023…  0.02    NA    NA

Get Rid of NA Values

Solution

OAW2309 = OAW2309%>% 
  drop_na()

summary(OAW2309)
##    STATION              NAME              LATITUDE       LONGITUDE     
##  Length:30075       Length:30075       Min.   :46.97   Min.   :-122.9  
##  Class :character   Class :character   1st Qu.:46.97   1st Qu.:-122.9  
##  Mode  :character   Mode  :character   Median :46.97   Median :-122.9  
##                                        Mean   :46.97   Mean   :-122.9  
##                                        3rd Qu.:46.97   3rd Qu.:-122.9  
##                                        Max.   :46.97   Max.   :-122.9  
##    ELEVATION        DATE                PRCP             TMAX       
##  Min.   :60.9   Length:30075       Min.   :0.0000   Min.   : 18.00  
##  1st Qu.:60.9   Class :character   1st Qu.:0.0000   1st Qu.: 50.00  
##  Median :60.9   Mode  :character   Median :0.0000   Median : 59.00  
##  Mean   :60.9                      Mean   :0.1362   Mean   : 60.62  
##  3rd Qu.:60.9                      3rd Qu.:0.1400   3rd Qu.: 71.00  
##  Max.   :60.9                      Max.   :4.8200   Max.   :110.00  
##       TMIN      
##  Min.   :-8.00  
##  1st Qu.:33.00  
##  Median :40.00  
##  Mean   :39.86  
##  3rd Qu.:47.00  
##  Max.   :69.00

Drop Name, Station, and SNOW

Solution

OAW2309 = OAW2309 %>% 
  select(DATE, PRCP, TMAX, TMIN)

summary(OAW2309)
##      DATE                PRCP             TMAX             TMIN      
##  Length:30075       Min.   :0.0000   Min.   : 18.00   Min.   :-8.00  
##  Class :character   1st Qu.:0.0000   1st Qu.: 50.00   1st Qu.:33.00  
##  Mode  :character   Median :0.0000   Median : 59.00   Median :40.00  
##                     Mean   :0.1362   Mean   : 60.62   Mean   :39.86  
##                     3rd Qu.:0.1400   3rd Qu.: 71.00   3rd Qu.:47.00  
##                     Max.   :4.8200   Max.   :110.00   Max.   :69.00

Add Date Info

Do a little research on the lubridate functions year(), month(), and day(). Use the to add yr, mo, and dy to the dataframe.

Solution

OAW2309 = OAW2309 %>% 
  mutate(yr = year(DATE),
         mo = month(DATE),
         mo = factor(mo),
         dy = day(DATE))

summary(OAW2309)
##      DATE                PRCP             TMAX             TMIN      
##  Length:30075       Min.   :0.0000   Min.   : 18.00   Min.   :-8.00  
##  Class :character   1st Qu.:0.0000   1st Qu.: 50.00   1st Qu.:33.00  
##  Mode  :character   Median :0.0000   Median : 59.00   Median :40.00  
##                     Mean   :0.1362   Mean   : 60.62   Mean   :39.86  
##                     3rd Qu.:0.1400   3rd Qu.: 71.00   3rd Qu.:47.00  
##                     Max.   :4.8200   Max.   :110.00   Max.   :69.00  
##                                                                      
##        yr             mo              dy       
##  Min.   :1941   8      : 2573   Min.   : 1.00  
##  1st Qu.:1961   7      : 2572   1st Qu.: 8.00  
##  Median :1982   5      : 2556   Median :16.00  
##  Mean   :1982   10     : 2542   Mean   :15.74  
##  3rd Qu.:2003   1      : 2541   3rd Qu.:23.00  
##  Max.   :2023   3      : 2540   Max.   :31.00  
##                 (Other):14751
head(OAW2309)
## # A tibble: 6 × 7
##   DATE        PRCP  TMAX  TMIN    yr mo       dy
##   <chr>      <dbl> <dbl> <dbl> <dbl> <fct> <int>
## 1 1941-05-13  0       66    50  1941 5        13
## 2 1941-05-14  0       63    47  1941 5        14
## 3 1941-05-15  0.3     58    44  1941 5        15
## 4 1941-05-16  1.08    55    45  1941 5        16
## 5 1941-05-17  0.06    57    46  1941 5        17
## 6 1941-05-18  0       59    39  1941 5        18

Save the file

#save(OAW2309,file = "OAW2309.Rdata")

Save the Dataframe

Solution

# save(OAW2309,file = "OAW2309.Rdata")
# Comment to not do this while knitting