library(tidyverse) # Various helpful utilities
library(sp) # Spatial module
library(DBI) # Database routines
library(odbc) # Database access via ODBC
library(tmap) # Map plotting
# Connect to PostGIS
db <- dbConnect(odbc(), "ghcn")
# Reference the DB tables "ghcn_stations" and "ghcn"
tbl_stations <- db %>% tbl("ghcn_stations")
tbl_stations
## # Source: table<ghcn_stations> [?? x 9]
## # Database: PostgreSQL 9.6.3[postgres@localhost/postgres]
## id lat lon elevation state name
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ACW00011604 17.1167 -61.7833 10.1 <NA> ST JOHNS COOLIDGE FLD
## 2 ACW00011647 17.1333 -61.7833 19.2 <NA> ST JOHNS
## 3 AE000041196 25.3330 55.5170 34.0 <NA> SHARJAH INTER. AIRP
## 4 AEM00041194 25.2550 55.3640 10.4 <NA> DUBAI INTL
## 5 AEM00041217 24.4330 54.6510 26.8 <NA> ABU DHABI INTL
## 6 AEM00041218 24.2620 55.6090 264.9 <NA> AL AIN INTL
## 7 AF000040930 35.3170 69.0170 3366.0 <NA> NORTH-SALANG
## 8 AFM00040938 34.2100 62.2280 977.2 <NA> HERAT
## 9 AFM00040948 34.5660 69.2120 1791.3 <NA> KABUL INTL
## 10 AFM00040990 31.5000 65.8500 1010.0 <NA> KANDAHAR AIRPORT
## # ... with more rows, and 3 more variables: gsn <chr>, network <chr>,
## # wmo_id <int>
tbl_observations <- db %>% tbl("ghcn")
tbl_observations
## # Source: table<ghcn> [?? x 8]
## # Database: PostgreSQL 9.6.3[postgres@localhost/postgres]
## id date PRCP TAVG TMAX TMIN year month
## <chr> <int> <int> <dbl> <dbl> <dbl> <int> <int>
## 1 USS0015F14S 20160325 0 27.32 44.24 7.34 2016 3
## 2 USS0015F14S 20160326 76 27.68 53.06 3.38 2016 3
## 3 USS0015F14S 20160327 0 32.90 46.58 17.60 2016 3
## 4 USS0015F14S 20160328 NA 30.38 47.12 8.24 2016 3
## 5 USS0015F14S 20160329 0 39.02 50.54 28.76 2016 3
## 6 USS0015F14S 20160330 0 39.74 64.76 22.64 2016 3
## 7 USS0015F14S 20160331 0 39.20 60.44 21.56 2016 3
## 8 USS0019L03S 20160331 0 35.42 51.98 20.66 2016 3
## 9 USS0019L03S 20160330 0 32.00 40.46 23.18 2016 3
## 10 USS0019L03S 20160329 51 26.96 38.12 18.86 2016 3
## # ... with more rows
# Select the rows in tbl_observations for April 1, 2016,
# and filter out any rows without average temperature data.
# Then do a SQL join with ghcn_stations where we can find
# latitude/longitude data.
data <- tbl_observations %>%
filter(date == "20160401" & !is.na(TAVG)) %>%
left_join(tbl_stations, "id") %>%
collect()
data
## # A tibble: 6,576 x 16
## id date PRCP TAVG TMAX TMIN year month lat
## <chr> <int> <int> <dbl> <dbl> <dbl> <int> <int> <dbl>
## 1 AE000041196 20160401 NA 70.34 NA NA 2016 4 25.3330
## 2 AEM00041194 20160401 NA 70.88 NA NA 2016 4 25.2550
## 3 AEM00041217 20160401 NA 71.06 NA NA 2016 4 24.4330
## 4 AEM00041218 20160401 NA 70.34 NA NA 2016 4 24.2620
## 5 AFM00040938 20160401 NA 49.82 NA NA 2016 4 34.2100
## 6 AFM00040948 20160401 NA 50.72 NA NA 2016 4 34.5660
## 7 AG000060390 20160401 0 57.92 62.60 NA 2016 4 36.7167
## 8 AG000060590 20160401 0 81.68 NA NA 2016 4 30.5667
## 9 AG000060611 20160401 0 76.46 NA 57.02 2016 4 28.0500
## 10 AGE00147708 20160401 0 55.94 62.06 NA 2016 4 36.7200
## # ... with 6,566 more rows, and 7 more variables: lon <dbl>,
## # elevation <dbl>, state <chr>, name <chr>, gsn <chr>, network <chr>,
## # wmo_id <int>
# Tell R which columns contain lon/lat data
coordinates(data) <- ~ lon + lat
# Tell R the CRS for the spatial data
proj4string(data) <- "+init=epsg:4326"
# Create a simple dot plot using Winkel Tripel
data %>%
tm_shape(projection = "+proj=wintri") +
tm_dots(col = "TAVG")

# Calculate the average temp for the month of
# April 2016, per station.
data <- tbl_observations %>%
filter(year == 2016 & month == 4) %>%
group_by(id) %>%
summarise(TAVG = mean(TAVG)) %>%
filter(!is.na(TAVG)) %>%
left_join(tbl_stations, "id") %>%
collect()
coordinates(data) <- ~ lon + lat
proj4string(data) <- "+init=epsg:4326"
data %>%
tm_shape(proj = "+proj=wintri") +
tm_dots(col = "TAVG")

# Let's make a histogram this time, too
ggplot2::qplot(data$TAVG) +
xlab("Avg temp (F)") +
ylab("Station count")
