This is a short demonstration of how I managed to pull some environmental data from a website using R and then creating some different visualizations.
The objectives I gave myself in this exercise were to;
Some id variables that were gathered manually and will be used with
the html link.
print(sites)
## # A tibble: 6 × 12
## id site wd wg ws at rf cu ht sm weather rivers
## <chr> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
## 1 8 Kapoaiai… TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## 2 32 Stony at… FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## 3 33 Taungata… TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## 4 4 Waiwhaka… TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## 5 42 Waitara … FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE TRUE
## 6 24 North Eg… FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
print(measures)
## # A tibble: 8 × 5
## label id desc weather river
## <chr> <chr> <chr> <lgl> <lgl>
## 1 wg 10 Wind Gust TRUE FALSE
## 2 wd 4 Wind Direction TRUE FALSE
## 3 ws 2 Wind Speed TRUE FALSE
## 4 at 3 Air Temperature TRUE FALSE
## 5 rf 1 Rainfall TRUE TRUE
## 6 cu 9 River Flow FALSE TRUE
## 7 ht 7 River Level FALSE TRUE
## 8 sm 5 Soil Moisture FALSE TRUE
Some holders that will receive variable data as we gather the data by reading the website
site_link = "?siteID="
measure_link = "&measureID="
time_link = "&timePeriod="
date_link = "&fromDate="
Generate a list of dates to be used to iterate through the web
data.
In this case 3 years worth of data
dates.from <- tribble(~dt, today())
for (d in 1:3){
dates.from = rbind(dates.from, c(today() - (d * 365) ))
}
dates.from <- dates.from[-1,]
We just want weather specific measures, so filter sites and measures accordingly.
measures_t <- measures %>% filter(., weather == TRUE)
sites_weather <- sites %>% filter(., weather == TRUE)
Now lets look at objective 1 and retrieve some data.
rawdata.3 <- tribble(~dte_tme, ~value, ~site_id, ~measure)
# Loop by year( as i ), then by site( as s ), then by measure(as k )
for (i in 1:3){
date_link_t = paste0(date_link, dates.from$dt[i])
for (s in 1:nrow(sites_weather)){
site_link_t = paste0(site_link, sites_weather$id[s])
for (k in 1:5){
measure_link_t = paste0(measure_link, measures_t$id[k])
link = paste0(base_link,
site_link_t,
measure_link_t,
paste0(time_link, "365days"),
date_link_t)
rawdata = fromJSON(link)
rawdata2 = rawdata$highStockData
rawdata2 <- as.data.frame(rawdata2)
colnames(rawdata2)[1:2] <- c("dte_tme", "value")
rawdata2 <- rawdata2 %>%
mutate(site_id = sites_weather$id[s]) %>%
mutate(measure = measures_t$id[k]) %>%
mutate(dte_tme = dte_tme / 1000) %>%
mutate(as_dte_tme = as.POSIXct(dte_tme))
rawdata.3 <- rbind(rawdata.3, rawdata2)
}
}
}
A quick look at a snippet of the raw data we have pulled from the
website.
We now have 3 years of weather data for any site that
measures weather type data.
head(rawdata.3, 10)
## dte_tme value site_id measure as_dte_tme
## 1 1669460400 24.2 8 10 2022-11-27
## 2 1669546800 26.6 8 10 2022-11-28
## 3 1669633200 29.9 8 10 2022-11-29
## 4 1669719600 26.9 8 10 2022-11-30
## 5 1669806000 29.2 8 10 2022-12-01
## 6 1669892400 36.3 8 10 2022-12-02
## 7 1669978800 30.6 8 10 2022-12-03
## 8 1670065200 23.2 8 10 2022-12-04
## 9 1670151600 19.2 8 10 2022-12-05
## 10 1670238000 21.1 8 10 2022-12-06
Now lets move on to objective 2 and present some wind data in a more
intuitive way.
First we need to pull out the wind data from raw
data and do a bit of cleaning and summarising.
wind_data_2 <- filter(rawdata.3, measure == 2 | measure == 4) %>%
distinct() %>%
pivot_wider(.,
names_from = "measure",
values_from = "value",
values_fill = 0) %>%
select(-1) %>%
mutate(year = as.numeric(year(as_dte_tme))) %>%
mutate(month = as.numeric(month(as_dte_tme))) %>%
mutate(month_name = month(as_dte_tme, abbr = TRUE, label = TRUE))
colnames(wind_data_2)[3] <- "degrees_clean"
colnames(wind_data_2)[4] <- "ws"
wind_data_2 <- wind_data_2 %>%
mutate(spd_bin = paste(round_any(ws, 10, f = floor), "-", round_any(ws, 10, f = ceiling)))
wind_data_2 <- merge(wind_data_2, sites, by.x = "site_id", by.y = "id") %>%
arrange(site, as_dte_tme) %>%
select(-10:-18) %>%
mutate(step_number = 1:nrow(.))
Here is a link where you can view the current presentation of wind
data.
https://www.trc.govt.nz/environment/maps-and-data/site-details/?siteID=8&measureID=4&timePeriod=30days
You have to switch to a secondary view for wind speed.
Lets
combine both wind direction and wind speed into a windrose.
plot.windrose.2 <- ggplot(wind_data_2, aes(x = degrees_clean, fill = spd_bin, position = site)) +
geom_histogram(binwidth = 15, boundary = -7.5, colour = "black", linewidth = .25) +
guides(fill = guide_legend(reverse = TRUE)) +
coord_polar() +
scale_x_continuous(limits = c(0,360),
breaks = seq(0, 360, by = 45),
labels = c("N", "NE", "E", "SE", "S", "SW", "W", "NW", "N"),
minor_breaks = seq(0, 360, by = 15)) +
scale_fill_brewer(palette = "YIOrRd") +
labs(title = "Wind Direction and Speed by Site: ",
subtitle = "Three years of data combined",
caption = "Data sourced from trc.govt.nz",
x = "",
y = "",
fill = "Speed (km/h)") +
theme(panel.background = element_rect(fill = 'lightblue', color = 'darkblue'),
panel.grid.minor = element_line(linetype = 'dotted'),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_text(size = 6),
legend.title = element_text(size = 8),
legend.position = "right",
legend.background = element_rect(fill = "lightblue")) +
facet_wrap(~site, ncol = 2)
plot.windrose.2
For a bit more fun, lets animate this data so we can get a feel of what
the wind speed and direction is month by month.
We’ll just
look at Kapoaiaia for this exercise as this is my closest weather
station.
wind_data_2 <- wind_data_2 %>%
filter(site_id == 8)
plot.windrose.2 <- ggplot(wind_data_2, aes(x = degrees_clean, fill = spd_bin, position = site)) +
geom_histogram(binwidth = 15, boundary = -7.5, colour = "black", linewidth = .25) +
guides(fill = guide_legend(reverse = TRUE)) +
coord_polar() +
scale_x_continuous(limits = c(0,360),
breaks = seq(0, 360, by = 45),
labels = c("N", "NE", "E", "SE", "S", "SW", "W", "NW", "N"),
minor_breaks = seq(0, 360, by = 15)) +
scale_fill_brewer(palette = "YIOrRd") +
labs(title = "Wind Direction and Speed for Site : Kapoaiaia at Cape Egmont",
subtitle = "Month: {month.abb[as.numeric(frame_time)]}",
caption = "Data sourced from trc.govt.nz",
x = "",
y = "",
fill = "Speed (km/h)") +
theme(panel.background = element_rect(fill = 'lightblue', color = 'darkblue'),
panel.grid.minor = element_line(linetype = 'dotted'),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_text(size = 6),
legend.title = element_text(size = 8),
legend.position = "right",
legend.background = element_rect(fill = "lightblue"))
plot.windrose.2.anim = plot.windrose.2 + transition_time(month)
plot.windrose.2.anim
Lastly, objective 3 is to look for any correlation between wind
direction and rainfall.
This will require creating a new data
frame from the raw data and applying some analysis to it.
For an
easier to view output, we will put the wind direction measurements into
bins that equate to points on a compass.
We’ll also put the
rainfall amount into bins.
df.weather <- rawdata.3 %>%
filter(site_id == 8 & measure %in% c(1, 2, 3, 4)) %>%
distinct() %>%
pivot_wider(.,
names_from = "measure",
values_from = "value",
values_fill = 0) %>%
select(-1,-2) %>%
mutate(month = as.numeric(month(as_dte_tme))) %>%
mutate(month_name = month(as_dte_tme, abbr = TRUE, label = TRUE)) %>%
mutate(dir_bin = cut(`4`, breaks = c(0,22.5,67.5,112.5,157.5,202.5,247.5,292.5,337.5,360),
labels = c("N","NE","E","SE","S","SW","W","NW","N"))) %>%
mutate(rain_bin = case_when(
`1` == 0 ~ "0",
`1` > 0 & `1` <= 5 ~ "0-05",
`1` > 5 & `1` <= 10 ~ "05-10",
`1` > 10 & `1` <= 15 ~ "10-15",
`1` > 15 & `1` <= 20 ~ "15-20",
`1` > 20 & `1` <= 25 ~ "20-25",
`1` > 25 & `1` <= 30 ~ "25-30",
`1` > 30 & `1` <= 35 ~ "30-35",
`1` > 35 & `1` <= 40 ~ "35-40",
`1` > 40 & `1` <= 45 ~ "40-45",
`1` > 45 & `1` <= 50 ~ "45-50",
`1` > 50 & `1` <= 55 ~ "50-55",
`1` > 55 & `1` <= 60 ~ "55-60",
`1` > 60 ~ "60+",
TRUE ~ NA_character_
))
colnames(df.weather) <- c("dte_tme","dir","spd","tmp","rain","month","month_name","dir_bin","rain_bin")
A quick plot of the data to look for any patterns.
ggplot(df.weather,
aes(x=`dir`, y=`spd`, position=`month_name`)) +
geom_point(aes(size = `rain`))
Lets use the bins instead which should be easier to interpret having the
points of the compass as labels. We’ll look at the frequency of rainfall
amount per day and the prevailing wind direction.
plotrain <- ggplot(df.weather, aes(dir_bin, rain_bin)) +
geom_bin2d(breaks = seq(-0.25, 8.25, 0.5)) +
stat_bin2d(geom = "text", aes(label = after_stat(count)),size=2, breaks = seq(-0.25, 8.25, 0.5)) +
scale_fill_gradient(low="white", high="red") +
labs(title = "Wind Direction and Frequency of Rainfall volume: Kapoaiaia at Cape Egmont",
subtitle = "",
caption = "Data sourced from trc.govt.nz",
x = "Wind Direction",
y = "Rainfall (mm/24h)",
fill = "Frequency") +
theme(panel.background = element_rect(fill = 'lightgrey', color = 'darkblue'),
panel.grid.minor = element_line(linetype = 'dotted'),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
axis.title = element_text(size = 8),
legend.text = element_text(size = 6),
legend.title = element_text(size = 8),
legend.position = "right",
legend.background = element_rect(fill = "lightgrey"))
plotrain
This appears to align with my own observations living here, being
that;
It was however surprising to see medium-higher rainfall days from the SE and S also. Further analysis might give some insight where the resolution of the data is taken below daily measurements down to hourly.
Lastly, for interest we’ll look at the above data on a monthly basis.
plotrainmonth <- plotrain +
facet_wrap(~month_name, ncol = 4)
plotrainmonth