Check whether sufficient sites have net radiation data.
filnam <- "./data/df_fluxnet.rds"
if (!file.exists(filnam)){
df_fluxnet <- ingest(
siteinfo = ingestr::siteinfo_fluxnet2015,
source = "fluxnet",
getvars = list(
netrad = "NETRAD",
ppfd = "SW_IN_F",
temp = "TA_F_DAY"
),
dir = "~/data/FLUXNET-2015_Tier1/20191024/DD/",
timescale = "d",
settings = list(
getswc = TRUE,
threshold_NETRAD = 0.8
),
)
saveRDS(df_fluxnet, file = filnam)
} else {
df_fluxnet <- readRDS(filnam)
}
Missing data across all sites.
df_fluxnet |>
tidyr::unnest(data) |>
select(netrad) |>
visdat::vis_miss(warn_large_data = FALSE, cluster = FALSE)
#> Adding missing grouping variables: `sitename`
Get fraction of missing data per site.
calc_f_missing <- function(df, varnam){
vec <- df |>
pull(!!varnam)
(sum(is.na(vec)) / nrow(df))
}
df_missing <- df_fluxnet |>
mutate(f_missing = purrr::map_dbl(data, ~calc_f_missing(., "netrad")))
Distribution of missing data.
df_missing |>
ggplot(aes(x = f_missing, y = ..count..)) +
geom_histogram()
#> Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
#> ℹ Please use `after_stat(count)` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Available sites with maximum 10% missing.
df_sites <- df_missing |>
select(-data) |>
filter(f_missing <= 0.1) |>
arrange(desc(f_missing)) |>
left_join(
ingestr::siteinfo_fluxnet2015,
by = "sitename"
) |>
filter(classid != "CRO") |>
mutate(nyears = as.numeric(year_end) - as.numeric(year_start) + 1)
Selecting 17 (to have enough to start with):
df_sites <- df_sites |>
filter(sitename %in% c("FR-LBr", "FR-Pue", "US-Ton", "US-NR1", "NL-Loo", "US-MMS", "US-Wkg", "FR-Pue", "AU-DaS", "DE-Tha", "GF-Guy", "US-NR1", "US-UMB", "AU-Tum", "US-Me2", "DE-Hai", "US-SRM"))
Fill missing data using KNN.
df_fluxnet_filled <- df_fluxnet |>
filter(sitename %in% df_sites$sitename) |>
mutate(data = purrr::map(data, ~fill_missing_netrad(.)))
With filled values.
df_fluxnet_filled |>
filter(sitename == "FR-Pue") |>
unnest() |>
ggplot() +
geom_point(aes(date, netrad_filled), color = "red") +
geom_point(aes(date, netrad))
#> Warning: `cols` is now required when using `unnest()`.
#> ℹ Please use `cols = c(data)`.
#> Warning: Removed 198 rows containing missing values (`geom_point()`).
Table of missing data
df_missing |>
select(sitename, f_missing) |>
knitr::kable()
sitename | f_missing |
---|---|
AR-SLu | 0.5908676 |
AR-Vir | 0.4363014 |
AT-Neu | 0.0229141 |
AU-Ade | 0.4922374 |
AU-ASM | 0.1712329 |
AU-Cpr | 0.1457534 |
AU-Cum | 0.2666667 |
AU-DaP | 0.1749511 |
AU-DaS | 0.0324853 |
AU-Dry | 0.2986301 |
AU-Emr | 0.1771689 |
AU-Fog | 0.1753425 |
AU-Gin | 0.2979452 |
AU-GWW | 0.4246575 |
AU-How | 0.4448141 |
AU-Lox | 0.5986301 |
AU-RDF | 0.4410959 |
AU-Rig | 0.1315068 |
AU-Rob | 0.0082192 |
AU-Stp | 0.1776908 |
AU-TTE | 0.2712329 |
AU-Tum | 0.0551859 |
AU-Wac | 0.2116438 |
AU-Whr | 0.2321918 |
AU-Wom | 0.0949772 |
AU-Ync | 0.2767123 |
BE-Bra | 0.1900505 |
BE-Lon | 0.0784558 |
BE-Vie | 0.1093006 |
BR-Sa3 | 0.4208219 |
CA-Man | 0.1663927 |
CA-NS1 | 0.3747945 |
CA-NS2 | 0.2668493 |
CA-NS3 | 0.2394521 |
CA-NS4 | 0.2705479 |
CA-NS5 | 0.2032877 |
CA-NS6 | 0.2043836 |
CA-NS7 | 0.1993151 |
CA-Qfo | 0.0698630 |
CA-SF1 | 0.1650685 |
CA-SF2 | 0.3764384 |
CA-SF3 | 0.2447489 |
CH-Cha | 0.3016438 |
CH-Dav | 0.1885845 |
CH-Fru | 0.3010959 |
CH-Lae | 1.0000000 |
CH-Oe1 | 0.2046967 |
CH-Oe2 | 1.0000000 |
CN-Cha | 0.0000000 |
CN-Cng | 0.2260274 |
CN-Dan | 0.0000000 |
CN-Din | 0.0000000 |
CN-Du2 | 0.1223744 |
CN-Ha2 | 0.0000000 |
CN-HaM | 0.0657534 |
CN-Qia | 0.0000000 |
CN-Sw2 | 0.6347032 |
CZ-BK1 | 0.5194521 |
CZ-BK2 | 0.6785388 |
CZ-wet | 0.1296804 |
DE-Akm | 0.2465753 |
DE-Geb | 0.0043053 |
DE-Gri | 0.0174346 |
DE-Hai | 0.0179136 |
DE-Kli | 0.2587796 |
DE-Lkb | 0.2120548 |
DE-Obe | 0.0414873 |
DE-RuR | 0.1328767 |
DE-RuS | 0.3534247 |
DE-Seh | 0.1801370 |
DE-SfN | 0.1963470 |
DE-Spw | 0.1523288 |
DE-Tha | 0.0184571 |
DK-Fou | 0.1835616 |
DK-NuF | 0.6348337 |
DK-Sor | 0.5159337 |
DK-ZaF | 0.9500000 |
DK-ZaH | 0.7391781 |
ES-LgS | 0.2803653 |
ES-Ln2 | 1.0000000 |
FI-Hyy | 0.2092286 |
FI-Jok | 0.1931507 |
FI-Lom | 0.0347032 |
FI-Sod | 0.3590998 |
FR-Fon | 0.0189041 |
FR-Gri | 0.0605479 |
FR-LBr | 0.0537408 |
FR-Pue | 0.0361644 |
GF-Guy | 0.0141968 |
IT-BCi | 0.2747198 |
IT-CA1 | 0.6616438 |
IT-CA2 | 0.6705479 |
IT-CA3 | 0.3815068 |
IT-Col | 0.3387167 |
IT-Cp2 | 0.4337900 |
IT-Cpz | 0.2421496 |
IT-Isp | 0.0000000 |
IT-La2 | 0.5287671 |
IT-Lav | 0.0481735 |
IT-MBo | 0.0674969 |
IT-Noe | 0.1599004 |
IT-PT1 | 0.1178082 |
IT-Ren | 0.1398973 |
IT-Ro1 | 0.3226788 |
IT-Ro2 | 0.2931507 |
IT-SR2 | 0.0000000 |
IT-SRo | 0.4620352 |
IT-Tor | 0.1792564 |
JP-MBF | 1.0000000 |
JP-SMF | 1.0000000 |
NL-Hor | 0.1260274 |
NL-Loo | 0.0561644 |
NO-Adv | 0.6815068 |
NO-Blv | 1.0000000 |
RU-Che | 0.6486301 |
RU-Cok | 0.6641553 |
RU-Fyo | 0.1182917 |
RU-Ha1 | 0.4520548 |
SD-Dem | 0.1654795 |
SN-Dhr | 0.2390411 |
US-AR1 | 0.1157534 |
US-AR2 | 0.2232877 |
US-ARb | 0.1767123 |
US-ARc | 0.1739726 |
US-ARM | 0.0865753 |
US-Blo | 0.2313823 |
US-Cop | 0.4160470 |
US-GBT | 0.2106164 |
US-GLE | 0.1185554 |
US-Ha1 | 0.3346202 |
US-KS2 | 0.0698630 |
US-Los | 0.4213699 |
US-Me1 | 0.5465753 |
US-Me2 | 0.0522655 |
US-Me6 | 0.1616438 |
US-MMS | 0.0594178 |
US-Myb | 0.2482192 |
US-Ne1 | 0.0817703 |
US-Ne2 | 0.0788198 |
US-Ne3 | 0.0802950 |
US-NR1 | 0.0547945 |
US-ORv | 0.1205479 |
US-PFa | 1.0000000 |
US-Prr | 0.2609589 |
US-SRG | 0.0626223 |
US-SRM | 0.0204234 |
US-Syv | 0.3982387 |
US-Ton | 0.0585127 |
US-Tw1 | 0.1771689 |
US-Tw2 | 0.5876712 |
US-Tw3 | 0.2726027 |
US-Tw4 | 0.4589041 |
US-Twt | 0.1908676 |
US-UMB | 0.0325114 |
US-UMd | 0.1191781 |
US-Var | 0.1693151 |
US-WCr | 0.3688356 |
US-Whs | 0.0794521 |
US-Wi0 | 0.4821918 |
US-Wi3 | 0.5388128 |
US-Wi4 | 0.5780822 |
US-Wi6 | 0.7013699 |
US-Wi9 | 0.6013699 |
US-Wkg | 0.0338730 |
ZA-Kru | 0.5155666 |
ZM-Mon | 0.8126027 |