library(tidyverse)  # Various helpful utilities
library(sp)         # Spatial module
library(DBI)        # Database routines
library(odbc)       # Database access via ODBC
library(tmap)       # Map plotting

# Connect to PostGIS
db <- dbConnect(odbc(), "ghcn")

# Reference the DB tables "ghcn_stations" and "ghcn"
tbl_stations <- db %>% tbl("ghcn_stations")
tbl_stations
## # Source:   table<ghcn_stations> [?? x 9]
## # Database: PostgreSQL 9.6.3[postgres@localhost/postgres]
##             id     lat      lon elevation state                  name
##          <chr>   <dbl>    <dbl>     <dbl> <chr>                 <chr>
##  1 ACW00011604 17.1167 -61.7833      10.1  <NA> ST JOHNS COOLIDGE FLD
##  2 ACW00011647 17.1333 -61.7833      19.2  <NA>              ST JOHNS
##  3 AE000041196 25.3330  55.5170      34.0  <NA>   SHARJAH INTER. AIRP
##  4 AEM00041194 25.2550  55.3640      10.4  <NA>            DUBAI INTL
##  5 AEM00041217 24.4330  54.6510      26.8  <NA>        ABU DHABI INTL
##  6 AEM00041218 24.2620  55.6090     264.9  <NA>           AL AIN INTL
##  7 AF000040930 35.3170  69.0170    3366.0  <NA>          NORTH-SALANG
##  8 AFM00040938 34.2100  62.2280     977.2  <NA>                 HERAT
##  9 AFM00040948 34.5660  69.2120    1791.3  <NA>            KABUL INTL
## 10 AFM00040990 31.5000  65.8500    1010.0  <NA>      KANDAHAR AIRPORT
## # ... with more rows, and 3 more variables: gsn <chr>, network <chr>,
## #   wmo_id <int>
tbl_observations <- db %>% tbl("ghcn")
tbl_observations
## # Source:   table<ghcn> [?? x 8]
## # Database: PostgreSQL 9.6.3[postgres@localhost/postgres]
##             id     date  PRCP  TAVG  TMAX  TMIN  year month
##          <chr>    <int> <int> <dbl> <dbl> <dbl> <int> <int>
##  1 USS0015F14S 20160325     0 27.32 44.24  7.34  2016     3
##  2 USS0015F14S 20160326    76 27.68 53.06  3.38  2016     3
##  3 USS0015F14S 20160327     0 32.90 46.58 17.60  2016     3
##  4 USS0015F14S 20160328    NA 30.38 47.12  8.24  2016     3
##  5 USS0015F14S 20160329     0 39.02 50.54 28.76  2016     3
##  6 USS0015F14S 20160330     0 39.74 64.76 22.64  2016     3
##  7 USS0015F14S 20160331     0 39.20 60.44 21.56  2016     3
##  8 USS0019L03S 20160331     0 35.42 51.98 20.66  2016     3
##  9 USS0019L03S 20160330     0 32.00 40.46 23.18  2016     3
## 10 USS0019L03S 20160329    51 26.96 38.12 18.86  2016     3
## # ... with more rows
# Select the rows in tbl_observations for April 1, 2016,
# and filter out any rows without average temperature data.
# Then do a SQL join with ghcn_stations where we can find
# latitude/longitude data.
data <- tbl_observations %>%
  filter(date == "20160401" & !is.na(TAVG)) %>%
  left_join(tbl_stations, "id") %>% 
  collect()
data
## # A tibble: 6,576 x 16
##             id     date  PRCP  TAVG  TMAX  TMIN  year month     lat
##          <chr>    <int> <int> <dbl> <dbl> <dbl> <int> <int>   <dbl>
##  1 AE000041196 20160401    NA 70.34    NA    NA  2016     4 25.3330
##  2 AEM00041194 20160401    NA 70.88    NA    NA  2016     4 25.2550
##  3 AEM00041217 20160401    NA 71.06    NA    NA  2016     4 24.4330
##  4 AEM00041218 20160401    NA 70.34    NA    NA  2016     4 24.2620
##  5 AFM00040938 20160401    NA 49.82    NA    NA  2016     4 34.2100
##  6 AFM00040948 20160401    NA 50.72    NA    NA  2016     4 34.5660
##  7 AG000060390 20160401     0 57.92 62.60    NA  2016     4 36.7167
##  8 AG000060590 20160401     0 81.68    NA    NA  2016     4 30.5667
##  9 AG000060611 20160401     0 76.46    NA 57.02  2016     4 28.0500
## 10 AGE00147708 20160401     0 55.94 62.06    NA  2016     4 36.7200
## # ... with 6,566 more rows, and 7 more variables: lon <dbl>,
## #   elevation <dbl>, state <chr>, name <chr>, gsn <chr>, network <chr>,
## #   wmo_id <int>
# Tell R which columns contain lon/lat data
coordinates(data) <- ~ lon + lat
# Tell R the CRS for the spatial data
proj4string(data) <- "+init=epsg:4326"

# Create a simple dot plot using Winkel Tripel
data %>% 
  tm_shape(projection = "+proj=wintri") +
  tm_dots(col = "TAVG")

# Calculate the average temp for the month of
# April 2016, per station.
data <- tbl_observations %>%
  filter(year == 2016 & month == 4) %>%
  group_by(id) %>%
  summarise(TAVG = mean(TAVG)) %>%
  filter(!is.na(TAVG)) %>%
  left_join(tbl_stations, "id") %>% 
  collect()

coordinates(data) <- ~ lon + lat
proj4string(data) <- "+init=epsg:4326"

data %>%
  tm_shape(proj = "+proj=wintri") +
  tm_dots(col = "TAVG")

# Let's make a histogram this time, too
ggplot2::qplot(data$TAVG) +
  xlab("Avg temp (F)") +
  ylab("Station count")