San Diego

Let’s get the weather from San Diego to compare with Olympia.

Libraries

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date

Data

load("/cloud/project/resources.rdata")

Get California Stations

Sort by distance from San Diego in View

resources %>% 
  filter(ST == "CA") %>% 
  select(ID,LAT,LON,YR_LEN,NAME) %>%
  mutate(dSanD = sqrt((LAT - 32.715736)^2 + (LON - -117.161087)^2)) -> ca_stations
# View(ca_stations) Done in console

Get SDL Data

Use the function read_station

read_station = function(ID){
  url = paste0("https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/",
                      ID,
                      ".csv")
  df = read_csv(url)
  return(df) 
}



# Get the data from San Diego Lindbergh
# First all of the data
SDL_Raw1 = read_station("USW00023188")
## Parsed with column specification:
## cols(
##   .default = col_logical(),
##   STATION = col_character(),
##   DATE = col_date(format = ""),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   ELEVATION = col_double(),
##   NAME = col_character(),
##   PRCP = col_double(),
##   PRCP_ATTRIBUTES = col_character(),
##   SNOW = col_double(),
##   SNOW_ATTRIBUTES = col_character(),
##   SNWD = col_double(),
##   SNWD_ATTRIBUTES = col_character(),
##   TMAX = col_double(),
##   TMAX_ATTRIBUTES = col_character(),
##   TMIN = col_double(),
##   TMIN_ATTRIBUTES = col_character(),
##   WT01 = col_double(),
##   WT01_ATTRIBUTES = col_character(),
##   WT03 = col_double(),
##   WT03_ATTRIBUTES = col_character()
##   # ... with 6 more columns
## )
## See spec(...) for full column specifications.
## Warning: 332203 parsing failures.
##  row             col           expected actual                                                                                                file
## 2074 WT05_ATTRIBUTES 1/0/T/F/TRUE/FALSE    ,,0 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 2075 WT05_ATTRIBUTES 1/0/T/F/TRUE/FALSE    ,,0 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3105 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE    ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3128 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE    ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3129 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE    ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## .... ............... .................. ...... ...................................................................................................
## See problems(...) for more details.
# Select the columns we want
SDL_Raw2 = SDL_Raw1 %>% select(STATION,DATE,NAME,PRCP,TMAX,TMIN)

summary(SDL_Raw2)
##    STATION               DATE                NAME                PRCP       
##  Length:29437       Min.   :1939-07-01   Length:29437       Min.   :  0.00  
##  Class :character   1st Qu.:1959-08-27   Class :character   1st Qu.:  0.00  
##  Mode  :character   Median :1979-10-20   Mode  :character   Median :  0.00  
##                     Mean   :1979-10-19                      Mean   :  6.91  
##                     3rd Qu.:1999-12-13                      3rd Qu.:  0.00  
##                     Max.   :2020-02-05                      Max.   :686.00  
##                                                             NA's   :1       
##       TMAX            TMIN      
##  Min.   : 78.0   Min.   :-17.0  
##  1st Qu.:189.0   1st Qu.:111.0  
##  Median :211.0   Median :144.0  
##  Mean   :214.9   Mean   :140.2  
##  3rd Qu.:239.0   3rd Qu.:172.0  
##  Max.   :439.0   Max.   :256.0  
##  NA's   :2       NA's   :2

It looks like only minimal cleaning is needed, just removing a small number of NA values. We also need to convert the measurements.

SDL = SDL_Raw2 %>% 
  na.omit() %>% 
  mutate(TMAX = TMAX * .1 * 1.8 + 32,
         TMIN = TMIN * .1 * 1.8 + 32,
         PRCP = PRCP/254,
         yr = year(DATE),
         mo = month(DATE),
         dy = day(DATE))
summary(SDL)
##    STATION               DATE                NAME                PRCP        
##  Length:29434       Min.   :1939-07-01   Length:29434       Min.   :0.00000  
##  Class :character   1st Qu.:1959-08-28   Class :character   1st Qu.:0.00000  
##  Mode  :character   Median :1979-10-20   Mode  :character   Median :0.00000  
##                     Mean   :1979-10-20                      Mean   :0.02721  
##                     3rd Qu.:1999-12-12                      3rd Qu.:0.00000  
##                     Max.   :2020-02-04                      Max.   :2.70079  
##       TMAX             TMIN             yr             mo        
##  Min.   : 46.04   Min.   :28.94   Min.   :1939   Min.   : 1.000  
##  1st Qu.: 66.02   1st Qu.:51.98   1st Qu.:1959   1st Qu.: 4.000  
##  Median : 69.98   Median :57.92   Median :1979   Median : 7.000  
##  Mean   : 70.69   Mean   :57.23   Mean   :1979   Mean   : 6.535  
##  3rd Qu.: 75.02   3rd Qu.:62.96   3rd Qu.:1999   3rd Qu.:10.000  
##  Max.   :111.02   Max.   :78.08   Max.   :2020   Max.   :12.000  
##        dy       
##  Min.   : 1.00  
##  1st Qu.: 8.00  
##  Median :16.00  
##  Mean   :15.73  
##  3rd Qu.:23.00  
##  Max.   :31.00
save(SDL,file="SDL.rdata")