Let’s get the weather from San Diego to compare with Olympia.
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
load("/cloud/project/resources.rdata")
Sort by distance from San Diego in View
resources %>%
filter(ST == "CA") %>%
select(ID,LAT,LON,YR_LEN,NAME) %>%
mutate(dSanD = sqrt((LAT - 32.715736)^2 + (LON - -117.161087)^2)) -> ca_stations
# View(ca_stations) Done in console
Use the function read_station
read_station = function(ID){
url = paste0("https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/",
ID,
".csv")
df = read_csv(url)
return(df)
}
# Get the data from San Diego Lindbergh
# First all of the data
SDL_Raw1 = read_station("USW00023188")
## Parsed with column specification:
## cols(
## .default = col_logical(),
## STATION = col_character(),
## DATE = col_date(format = ""),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## ELEVATION = col_double(),
## NAME = col_character(),
## PRCP = col_double(),
## PRCP_ATTRIBUTES = col_character(),
## SNOW = col_double(),
## SNOW_ATTRIBUTES = col_character(),
## SNWD = col_double(),
## SNWD_ATTRIBUTES = col_character(),
## TMAX = col_double(),
## TMAX_ATTRIBUTES = col_character(),
## TMIN = col_double(),
## TMIN_ATTRIBUTES = col_character(),
## WT01 = col_double(),
## WT01_ATTRIBUTES = col_character(),
## WT03 = col_double(),
## WT03_ATTRIBUTES = col_character()
## # ... with 6 more columns
## )
## See spec(...) for full column specifications.
## Warning: 332203 parsing failures.
## row col expected actual file
## 2074 WT05_ATTRIBUTES 1/0/T/F/TRUE/FALSE ,,0 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 2075 WT05_ATTRIBUTES 1/0/T/F/TRUE/FALSE ,,0 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3105 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3128 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## 3129 WT16_ATTRIBUTES 1/0/T/F/TRUE/FALSE ,,X 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/USW00023188.csv'
## .... ............... .................. ...... ...................................................................................................
## See problems(...) for more details.
# Select the columns we want
SDL_Raw2 = SDL_Raw1 %>% select(STATION,DATE,NAME,PRCP,TMAX,TMIN)
summary(SDL_Raw2)
## STATION DATE NAME PRCP
## Length:29437 Min. :1939-07-01 Length:29437 Min. : 0.00
## Class :character 1st Qu.:1959-08-27 Class :character 1st Qu.: 0.00
## Mode :character Median :1979-10-20 Mode :character Median : 0.00
## Mean :1979-10-19 Mean : 6.91
## 3rd Qu.:1999-12-13 3rd Qu.: 0.00
## Max. :2020-02-05 Max. :686.00
## NA's :1
## TMAX TMIN
## Min. : 78.0 Min. :-17.0
## 1st Qu.:189.0 1st Qu.:111.0
## Median :211.0 Median :144.0
## Mean :214.9 Mean :140.2
## 3rd Qu.:239.0 3rd Qu.:172.0
## Max. :439.0 Max. :256.0
## NA's :2 NA's :2
It looks like only minimal cleaning is needed, just removing a small number of NA values. We also need to convert the measurements.
SDL = SDL_Raw2 %>%
na.omit() %>%
mutate(TMAX = TMAX * .1 * 1.8 + 32,
TMIN = TMIN * .1 * 1.8 + 32,
PRCP = PRCP/254,
yr = year(DATE),
mo = month(DATE),
dy = day(DATE))
summary(SDL)
## STATION DATE NAME PRCP
## Length:29434 Min. :1939-07-01 Length:29434 Min. :0.00000
## Class :character 1st Qu.:1959-08-28 Class :character 1st Qu.:0.00000
## Mode :character Median :1979-10-20 Mode :character Median :0.00000
## Mean :1979-10-20 Mean :0.02721
## 3rd Qu.:1999-12-12 3rd Qu.:0.00000
## Max. :2020-02-04 Max. :2.70079
## TMAX TMIN yr mo
## Min. : 46.04 Min. :28.94 Min. :1939 Min. : 1.000
## 1st Qu.: 66.02 1st Qu.:51.98 1st Qu.:1959 1st Qu.: 4.000
## Median : 69.98 Median :57.92 Median :1979 Median : 7.000
## Mean : 70.69 Mean :57.23 Mean :1979 Mean : 6.535
## 3rd Qu.: 75.02 3rd Qu.:62.96 3rd Qu.:1999 3rd Qu.:10.000
## Max. :111.02 Max. :78.08 Max. :2020 Max. :12.000
## dy
## Min. : 1.00
## 1st Qu.: 8.00
## Median :16.00
## Mean :15.73
## 3rd Qu.:23.00
## Max. :31.00
save(SDL,file="SDL.rdata")