Raw station distribution by length of record (colour coded)
# Grids colour coded by length of record for each decade
library(RNetCDF)
library(lubridate)
library(maps)
library(plot3D)
# Import netcdf of total stations for each decade
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_totalDaysWithAtleast1StnPerDecade.nc")
record_length_per_decade <- var.get.nc(nc, "s")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
# Load a land sea mask
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/Global_1deg_LandSeamask.nc")
LSmask <- var.get.nc(nc, "p")
close.nc(nc)
# How many days in each decade
dates <- seq(ymd("19500101"), ymd("20131231"),1)
ydates <- year(dates)
days_per_decade <- sapply(seq(1960, 2020, 10), FUN = function(n) {length(which(ydates >= n - 10 & ydates < n))})
# Convert record length into percentage of days in decade
percentage_per_decade <- array(dim = dim(record_length_per_decade))
for (i in 1:7) {
percentage_per_decade[,,i] <- record_length_per_decade[,,i] / days_per_decade[i] * 100
percentage_per_decade[,,i][is.na(LSmask)] <- NA
}
plot.record_length_per_decade.panel <- function() {
layout(mat = matrix(c(1:10,10,10), ncol = 3, byrow = T), heights = c(1, 1, 1, 0.2))
brks <- seq(0, 100, 20)
cols <- c('#969696', '#ffffcc','#a1dab4','#41b6c4','#2c7fb8','#253494')
labels <- c("1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2013")
par(mar = c(0.5,1,1.5,1) + 0.1, oma = c(1,1,1,1), bg = NA)
for (decade in 1:6) {
image(lon, lat, percentage_per_decade[,,decade], breaks = brks, col = cols[2:length(cols)], xaxt = "n", yaxt = "n",
xlab = "", ylab = "", ylim = c(-60, 90))
greys <- percentage_per_decade[,,decade]
greys[which(greys > 0)] <- NA
image(lon, lat, greys, col = cols[1], add = T)
map("world", interior = F, add = T)
title(labels[decade], cex.main = 2)
}
plot.new()
image(lon, lat, percentage_per_decade[,,decade], breaks = brks, col = cols[2:length(cols)], xaxt = "n", yaxt = "n",
xlab = "", ylab = "", ylim = c(-60, 90))
greys <- percentage_per_decade[,,decade]
greys[which(greys > 0)] <- NA
image(lon, lat, greys, col = cols[1], add = T)
map("world", interior = F, add = T)
title(labels[7], cex.main = 2)
plot.new()
par(mar = c(1,5,0,5) + 0.1)
colkey(col = cols[2:length(cols)], breaks = brks, side = 1, cex.axis = 2, width = 8)
}
plot.record_length_per_decade.panel()
Now the same for the 40yr long term data
# Grids colour coded by length of record for each decade
library(RNetCDF)
library(lubridate)
library(maps)
library(plot3D)
# Import netcdf of total stations for each decade
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_totalDaysWithAtleast1StnPerDecade.nc")
record_length_per_decade <- var.get.nc(nc, "s")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
# I don't know why there must be
record_length_per_decade[which(record_length_per_decade == 3653)] <- 3652
# Load a land sea mask
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.0/ALL/CCRC_V1.0_1950-2013_1deg_LandSeamask.nc")
LSmask <- var.get.nc(nc, "p")
close.nc(nc)
# How many days in each decade
dates <- seq(ymd("19500101"), ymd("20131231"),1)
ydates <- year(dates)
days_per_decade <- sapply(seq(1960, 2020, 10), FUN = function(n) {length(which(ydates >= n - 10 & ydates < n))})
# Convert record length into percentage of days in decade
percentage_per_decade <- array(dim = dim(record_length_per_decade))
for (i in 1:7) {
percentage_per_decade[,,i] <- record_length_per_decade[,,i] / days_per_decade[i] * 100
percentage_per_decade[,,i][is.na(LSmask)] <- NA
}
plot.record_length_per_decade.panel <- function() {
layout(mat = matrix(c(1:10,10,10), ncol = 3, byrow = T), heights = c(1, 1, 1, 0.2))
brks <- seq(0, 100, 20)
cols <- c('#969696', '#ffffcc','#a1dab4','#41b6c4','#2c7fb8','#253494')
labels <- c("1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2013")
par(mar = c(0.5,1,1.5,1) + 0.1, oma = c(1,1,1,1), bg = NA)
for (decade in 1:6) {
image(lon, lat, percentage_per_decade[,,decade], breaks = brks, col = cols[2:length(cols)], xaxt = "n", yaxt = "n",
xlab = "", ylab = "", ylim = c(-60, 90))
greys <- percentage_per_decade[,,decade]
greys[which(greys > 0)] <- NA
image(lon, lat, greys, col = cols[1], add = T)
map("world", interior = F, add = T)
title(labels[decade], cex.main = 2)
}
plot.new()
image(lon, lat, percentage_per_decade[,,decade], breaks = brks, col = cols[2:length(cols)], xaxt = "n", yaxt = "n",
xlab = "", ylab = "", ylim = c(-60, 90))
greys <- percentage_per_decade[,,decade]
greys[which(greys > 0)] <- NA
image(lon, lat, greys, col = cols[1], add = T)
map("world", interior = F, add = T)
title(labels[7], cex.main = 2)
plot.new()
par(mar = c(1,5,0,5) + 0.1)
colkey(col = cols[2:length(cols)], breaks = brks, side = 1, cex.axis = 2, width = 8)
}
plot.record_length_per_decade.panel()
REGEN40yr
Redo the above figure but highlight with colours not just 80% grids but also 0,20%,30% and so on
library(RNetCDF)
library(lubridate)
library(maps)
library(plot3D)
# Now the same for GPCC
# Import netcdf of total stations for each decade
nc <- open.nc("/srv/ccrc/data45/z3289452/DailyPRCP_netCDFs/GPCC_FDD/full_data_daily_1988-2013_totaldayswithatleast1stationperdecade.nc")
GPCC.record_length_per_decade <- var.get.nc(nc, "s")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
# Load a land sea mask
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.0/ALL/CCRC_V1.0_1950-2013_1deg_mask.nc")
LSmask <- var.get.nc(nc, "p")
close.nc(nc)
# How many days in each decade
dates <- seq(ymd("19500101"), ymd("20131231"),1)
ydates <- year(dates)
days_per_decade <- sapply(seq(1960, 2020, 10), FUN = function(n) {length(which(ydates >= n - 10 & ydates < n))})
# Convert record length into percentage of days in decade
GPCC.percentage_per_decade <- array(dim = dim(record_length_per_decade))
for (i in 1:3) {
GPCC.percentage_per_decade[,,i] <- GPCC.record_length_per_decade[,,i] / days_per_decade[i] * 100
GPCC.percentage_per_decade[,,i][is.na(LSmask)] <- NA
}
plot.record_length_per_decade.panel <- function() {
layout(mat = matrix(c(1:4,4,4), ncol = 3, byrow = T), heights = c(1, 0.2))
brks <- seq(0, 100, 20)
cols <- c('#969696', '#ffffcc','#a1dab4','#41b6c4','#2c7fb8','#253494')
labels <- c("1990-1999", "2000-2009", "2010-2013")
par(mar = c(0.5,1,1.5,1) + 0.1, oma = c(1,1,1,1), bg = NA)
for (decade in 1:3) {
image(lon, lat, GPCC.percentage_per_decade[,,decade], breaks = brks, col = cols[2:length(cols)], xaxt = "n", yaxt = "n",
xlab = "", ylab = "", ylim = c(-60, 90))
greys <- GPCC.percentage_per_decade[,,decade]
greys[which(greys > 0)] <- NA
image(lon, lat, greys, col = cols[1], add = T)
map("world", interior = F, add = T)
title(labels[decade], cex.main = 2)
}
par(mar = c(1,5,0,5) + 0.1)
colkey(col = cols[2:length(cols)], breaks = brks, side = 1, cex.axis = 2, width = 8)
}
plot.record_length_per_decade.panel()
Flagrates and missing months
# Had to delete the first line of some summary files
# Print first line of each file to find out which files
cd /srv/ccrc/data11/z3289452/CCRCGlobalGriddedDailyPrecip/Metadata/VERSION1.0_data/summaries
for l in ALL TWENTY; do for r in Africa EstAsia Europe NthAmerica Oceania Russia SthAmerica SthAsia; do for f in BOTH CONCATENATED FLAGGED MISSING_MONTH; do echo $l $r $f:; head -n 1 $l/$r/"$f"_summary.txt; done; done; done
# Now delete the first line of the files
for l in ALL TWENTY; do for r in EstAsia Europe NthAmerica Russia SthAsia; do for f in BOTH CONCATENATED FLAGGED MISSING_MONTH; do echo $l $r $f:; sed -i.bak -e '1d' $l/$r/"$f"_summary.txt; done; done; done
library(lubridate)
# Read in summaries
path <- "/srv/ccrc/data11/z3289452/CCRCGlobalGriddedDailyPrecip/Metadata/VERSION1.1_data/summaries/"
len_criteria <- list('ALL', 'FORTY')
data_type <- list('CONCATENATED', 'FLAGGED', 'MISSING_MONTH', 'BOTH')
region <- list('NthAmerica', 'SthAmerica', 'Africa', 'Europe', 'Russia', 'SthAsia', 'EstAsia', 'Oceania')
summaries <- list()
for (l in 1:length(len_criteria)) {
summaries.tmp2 <- list()
for (d in 1:length(data_type)) {
summaries.tmp1 <- list()
for (r in 1:length(region)) {
summaries.tmp1[[r]] <- read.table(paste0(path, len_criteria[l], '/',
region[r], '/',
data_type[d], '_summary.txt'), header = F)
names(summaries.tmp1)[r] <- region[r]
summaries.tmp1[[r]][,1] <- ymd(summaries.tmp1[[r]][,1]) #Convert into dates
names(summaries.tmp1[[r]]) <- c("Date", region[r])
}
summaries.tmp2[[d]] <- summaries.tmp1
names(summaries.tmp2)[d] <- data_type[d]
}
summaries[[l]] <- summaries.tmp2
names(summaries)[l] <- len_criteria[l]
}
rm(summaries.tmp1, summaries.tmp2)
# yearly sumamries of flagged and missing month data
dates <- seq(ymd("19500101"), ymd("20131231"), by = 1)
# global total by year
library(dplyr)
# convert into data frame
# concatenated
concatenated <- summaries$ALL$CONCATENATED[[1]]
for (r in 2:length(region)) {
concatenated <- merge(concatenated, summaries$ALL$CONCATENATED[[r]], by = "Date")
}
# sum globally
concatenated$Global <- concatenated %>% select(-Date) %>% rowSums()
# aggregate by year
concatenated <- concatenated %>% mutate(Year = year(Date))
Annualconcatenated <- concatenated %>% group_by(Year) %>% summarise(Concatenated = sum(Global),
Concat.NthAm = sum(NthAmerica),
Concat.SthAm = sum(SthAmerica),
Concat.Afr = sum(Africa),
Concat.Eur = sum(Europe),
Concat.Rus = sum(Russia),
Concat.SthAs = sum(SthAsia),
Concat.EstAs = sum(EstAsia),
Concat.Oce = sum(Oceania))
# convert into data frame
# Flagged
flagged <- summaries$ALL$FLAGGED[[1]]
for (r in 2:length(region)) {
flagged <- merge(flagged, summaries$ALL$FLAGGED[[r]], by = "Date")
}
# sum globally
flagged$Global <- flagged %>% select(-Date) %>% rowSums()
# aggregate by year
flagged <- flagged %>% mutate(Year = year(Date))
Annualflagged <- flagged %>% group_by(Year) %>% summarise(Flagged = sum(Global),
Flagged.NthAm = sum(NthAmerica),
Flagged.SthAm = sum(SthAmerica),
Flagged.Afr = sum(Africa),
Flagged.Eur = sum(Europe),
Flagged.Rus = sum(Russia),
Flagged.SthAs = sum(SthAsia),
Flagged.EstAs = sum(EstAsia),
Flagged.Oce = sum(Oceania))
# convert into data frame
# Missing Month
missingmonth <- summaries$ALL$MISSING_MONTH[[1]]
for (r in 2:length(region)) {
missingmonth <- merge(missingmonth, summaries$ALL$MISSING_MONTH[[r]], by = "Date")
}
# sum globally
missingmonth$Global <- missingmonth %>% select(-Date) %>% rowSums()
# aggregate by year
missingmonth <- missingmonth %>% mutate(Year = year(Date))
Annualmissingmonth <- missingmonth %>% group_by(Year) %>% summarise(Missingmonth = sum(Global),
Missmon.NthAm = sum(NthAmerica),
Missmon.SthAm = sum(SthAmerica),
Missmon.Afr = sum(Africa),
Missmon.Eur = sum(Europe),
Missmon.Rus = sum(Russia),
Missmon.SthAs = sum(SthAsia),
Missmon.EstAs = sum(EstAsia),
Missmon.Oce = sum(Oceania))
# convert into data frame
# Both missing month and flagged
both <- summaries$ALL$BOTH[[1]]
for (r in 2:length(region)) {
both <- merge(both, summaries$ALL$BOTH[[r]], by = "Date")
}
# sum globally
both$Global <- both %>% select(-Date) %>% rowSums()
# aggregate by year
both <- both %>% mutate(Year = year(Date))
Annualboth <- both %>% group_by(Year) %>% summarise(Both = sum(Global),
Both.NthAm = sum(NthAmerica),
Both.SthAm = sum(SthAmerica),
Both.Afr = sum(Africa),
Both.Eur = sum(Europe),
Both.Rus = sum(Russia),
Both.SthAs = sum(SthAsia),
Both.EstAs = sum(EstAsia),
Both.Oce = sum(Oceania))
# Now combine all dataframes
AnnualData <- Reduce(function(...) merge(..., by = "Year"),
list(Annualconcatenated, Annualflagged, Annualmissingmonth, Annualboth))
AnnualData <- AnnualData %>% mutate(FlaggedPercentage = (Flagged + Both)/Concatenated * 100,
MissingPercentage = (Missingmonth + Both)/Concatenated * 100,
Flg.NthAm = (Flagged.NthAm + Both.NthAm)/Concat.NthAm * 100,
MM.NthAm = (Missmon.NthAm + Both.NthAm)/Concat.NthAm * 100,
Flg.SthAm = (Flagged.SthAm + Both.SthAm)/Concat.SthAm * 100,
MM.SthAm = (Missmon.SthAm + Both.SthAm)/Concat.SthAm * 100,
Flg.Afr = (Flagged.Afr + Both.Afr)/Concat.Afr * 100,
MM.Afr = (Missmon.Afr + Both.Afr)/Concat.Afr * 100,
Flg.Eur = (Flagged.Eur + Both.Eur)/Concat.Eur * 100,
MM.Eur = (Missmon.Eur + Both.Eur)/Concat.Eur * 100,
Flg.Rus = (Flagged.Rus + Both.Rus)/Concat.Rus * 100,
MM.Rus = (Missmon.Rus + Both.Rus)/Concat.Rus * 100,
Flg.SthAs = (Flagged.SthAs + Both.SthAs)/Concat.SthAs * 100,
MM.SthAs = (Missmon.SthAs + Both.SthAs)/Concat.SthAs * 100,
Flg.EstAs = (Flagged.EstAs + Both.EstAs)/Concat.EstAs * 100,
MM.EstAs = (Missmon.EstAs + Both.EstAs)/Concat.EstAs * 100,
Flg.Oce = (Flagged.Oce + Both.Oce)/Concat.Oce * 100,
MM.Oce = (Missmon.Oce + Both.Oce)/Concat.Oce * 100)
par(mar = c(4.5,4.5,0.5,0.5) + 0.1, bg = NA)
cols <- paste0(c('#000000','#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00','#ffff33','#a65628','#f781bf'),
"dd")
plot(AnnualData %>% select(Year, FlaggedPercentage) %>% filter(Year <= 2013),
type = "l", ylab = "Percentage of flagged records per year", cex.lab = 2, cex.axis = 2,
lwd = 6, col = cols[1])
par(mar = c(8,4.5,0.5,0.5) + 0.1, bg = NA, xpd = T)
plot(AnnualData %>% select(Year, MissingPercentage) %>% filter(Year <= 2013),
type = "l", ylab = "Percentage of records with missing month", cex.lab = 2, cex.axis = 2,
lwd = 6, ylim = range(AnnualData %>% filter(Year <= 2013) %>%
select(MissingPercentage,
grep("MM.*", names(AnnualData), value = T)),
na.rm = T))
for (i in 1:8) {
lines(AnnualData[which(AnnualData$Year <= 2013),1], AnnualData[which(AnnualData$Year <= 2013),2*(i - 1) + 41],
lwd = 6, col = cols[i + 1])
}
legend("bottom", horiz = T, legend = c("Global", "North America", "South America", "Africa"),
lwd = 6, col = cols[1:4], cex = 1.5, bty = "n", y.intersp = -8)
legend("bottom", horiz = T, legend = c("Europe", "Russia", "South Asia", "East Asia", "Oceania"),
lwd = 6, col = cols[5:9], cex = 1.5, bty = "n", y.intersp = -10)
# World map with regions
library(plot3D)
# create an empty plot
par(mar = c(0,0,0,0) + 0.1, bg = NA)
plot(0,xaxt='n',yaxt='n',bty='n',pch='',ylab='',xlab='',xlim = c(-180,180), ylim = c(-60,90))
map("world", interior = F, add = T)
mtext("North America", side = 3, line = -4, adj = 0.07 , cex = 1.5)
rect(-180,13,-34,90)
mtext("South America", side = 3, line = -19, adj = 0.07, cex = 1.5)
rect(-180,-90,-34,13)
mtext("Europe", side = 3, line = -4, adj = 0.48, cex = 1.5)
rect(-34,38,46,90)
mtext("Africa", side = 3, line = -13.5, adj = 0.45, cex = 1.5)
rect(-34,-90,60,38)
mtext("Russia", side = 3, line = -4, adj = 0.69, cex = 1.5)
rect(46,38,180,90)
mtext("South", side = 3, line = -13.5, adj = 0.7, cex = 1.5)
mtext("Asia", side = 3, line = -14.5, adj = 0.69, cex = 1.5)
rect(60,5,98,38)
mtext("East", side = 3, line = -13.5, adj = 0.95, cex = 1.5)
mtext("Asia", side = 3, line = -14.5, adj = 0.95, cex = 1.5)
rect(98,5,180,38)
mtext("Oceania", side = 3, line = -21, adj = 0.73, cex = 1.5)
rect(60,-90,180,5)
Number of stations per grid cell
Kriging Error
library(RNetCDF)
library(lubridate)
library(plot3D) # for colkey()
library(fields) # for map()
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_timavg.nc")
StdDev.Avg <- var.get.nc(nc, "sd")
KrigError.Avg <- var.get.nc(nc, "ek")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
quantile(KrigError.Avg, probs = seq(0, 1, 0.05), na.rm = T)
brks <- seq(0, 100, length.out = 6)
zbrks <- brks
zbrks[1] <- min(brks, KrigError.Avg, na.rm = T)
zbrks[length(brks)] <- max(brks, KrigError.Avg, na.rm = T)
lbrks <- ifelse(brks == zbrks, paste0(brks, "%"), "")
cols <- c('#feebe2','#fbb4b9','#f768a1','#c51b8a','#7a0177')
par(mar = c(0,0,0,0) + 0.1, bg = NA)
layout(mat = matrix(c(1,2), nrow = 2, byrow = T), heights = c(1,0.15))
image(lon, lat, KrigError.Avg, xlab = "", ylab = "", breaks = zbrks,
col = cols, xaxt = "n", yaxt = "n", cex.lab = 1.5, cex.axis = 1.5, ylim = c(-60,90))
map("world", interior = F, add = T)
#title(xlab = "CCRC Averaged Kriging Error 1950-2013 (%)", line = 1, cex.lab = 1.5)
par(mar = c(0,8,0,8) + 0.1)
colkey(col = cols, side = 1, clim = range(brks), width = 5, at = brks,
labels = paste(lbrks), cex.axis = 1.5)
title("REGEN V1.1 Averaged Kriging Error 1950-2013", line = -1.5, cex.main = 1.5)
# Do the same for long term 40yr data
library(RNetCDF)
library(lubridate)
library(plot3D) # for colkey()
library(fields) # for map()
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_timavg.nc")
StdDev.Avg <- var.get.nc(nc, "sd")
KrigError.Avg <- var.get.nc(nc, "ek")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
quantile(KrigError.Avg, probs = seq(0, 1, 0.05), na.rm = T)
brks <- seq(0, 100, length.out = 6)
zbrks <- brks
zbrks[1] <- min(brks, KrigError.Avg, na.rm = T)
zbrks[length(brks)] <- max(brks, KrigError.Avg, na.rm = T)
lbrks <- ifelse(brks == zbrks, paste0(brks, "%"), "")
cols <- c('#feebe2','#fbb4b9','#f768a1','#c51b8a','#7a0177')
par(mar = c(0,0,0,0) + 0.1, bg = NA)
layout(mat = matrix(c(1,2), nrow = 2, byrow = T), heights = c(1,0.15))
image(lon, lat, KrigError.Avg, xlab = "", ylab = "", breaks = zbrks,
col = cols, xaxt = "n", yaxt = "n", cex.lab = 1.5, cex.axis = 1.5, ylim = c(-60,90))
map("world", interior = F, add = T)
#title(xlab = "CCRC Averaged Kriging Error 1950-2013 (%)", line = 1, cex.lab = 1.5)
par(mar = c(0,8,0,8) + 0.1)
colkey(col = cols, side = 1, clim = range(brks), width = 5, at = brks,
labels = paste(lbrks), cex.axis = 1.5)
title("REGEN V1.1 Long Term 40yr Averaged Kriging Error 1950-2013", line = -1.5, cex.main = 1.5)
Coefficient of Variation
library(RNetCDF)
library(lubridate)
library(plot3D) # for colkey()
library(fields) # for map()
# Coefficient of Variation
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_CoefofVariation.nc")
CV.Avg <- var.get.nc(nc, "sd")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
quantile(CV.Avg, probs = seq(0, 1, 0.05), na.rm = T)
brks <- seq(0.25, 1.5, length.out = 6)
zbrks <- brks
zbrks[1] <- min(brks, CV.Avg, na.rm = T)
zbrks[length(brks)] <- max(brks, CV.Avg, na.rm = T)
lbrks <- ifelse(brks == zbrks, brks, "")
lbrks <- ifelse(brks == zbrks, lbrks, "")
cols <- c('#f1eef6','#d7b5d8','#df65b0','#dd1c77','#980043')
par(mar = c(0,0,0,0) + 0.1, bg = NA)
layout(mat = matrix(c(1,2), nrow = 2, byrow = T), heights = c(1,0.15))
image(lon, lat, CV.Avg, xlab = "", ylab = "", breaks = zbrks,
col = cols, xaxt = "n", yaxt = "n", cex.lab = 1.5, cex.axis = 1.5, ylim = c(-60,90))
map("world", interior = F, add = T)
par(mar = c(0,8,0,8) + 0.1)
colkey(col = cols, side = 1, clim = range(brks), width = 5, at = lbrks,
labels = paste(lbrks), cex.axis = 1.5)
title("Coefficient of variation (Std dev / Ann Clim) 1950-2013", line = -1.5, cex.main = 1.5)
# Now the long term 40yr data
library(RNetCDF)
library(lubridate)
library(plot3D) # for colkey()
library(fields) # for map()
# Coefficient of Variation
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_CoefofVariation.nc")
CV.Avg <- var.get.nc(nc, "sd")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
quantile(CV.Avg, probs = seq(0, 1, 0.05), na.rm = T)
brks <- seq(0.25, 1.5, length.out = 6)
zbrks <- brks
zbrks[1] <- min(brks, CV.Avg, na.rm = T)
zbrks[length(brks)] <- max(brks, CV.Avg, na.rm = T)
lbrks <- ifelse(brks == zbrks, brks, "")
lbrks <- ifelse(brks == zbrks, lbrks, "")
cols <- c('#f1eef6','#d7b5d8','#df65b0','#dd1c77','#980043')
par(mar = c(0,0,0,0) + 0.1, bg = NA)
layout(mat = matrix(c(1,2), nrow = 2, byrow = T), heights = c(1,0.15))
image(lon, lat, CV.Avg, xlab = "", ylab = "", breaks = zbrks,
col = cols, xaxt = "n", yaxt = "n", cex.lab = 1.5, cex.axis = 1.5, ylim = c(-60,90))
map("world", interior = F, add = T)
par(mar = c(0,8,0,8) + 0.1)
colkey(col = cols, side = 1, clim = range(brks), width = 5, at = lbrks,
labels = paste(lbrks), cex.axis = 1.5)
title("REGEN V1.1 Long Term 40yr Coefficient of variation (Std dev / Ann Clim) 1950-2013", line = -1.5, cex.main = 1.5)
Standard Deviation
The above figure will be replaced by a coefficient of variation map
# Let's first import the netcdf for one year
library(RNetCDF)
library(lubridate)
# Create the mask
# First import the LSmask
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.0/ALL/CCRC_V1.0_1950-2013_1deg_LandSeamask.nc")
LSmask <- var.get.nc(nc, "p")
close.nc(nc)
SDmask <- !LSmask
# Mask based on Standard Dev
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_timavg.nc")
ann.clim <- var.get.nc(nc, "p")
avg.sd <- var.get.nc(nc,"sd")
avg.cv <- avg.sd/ann.clim
avg.ek <- var.get.nc(nc, "ek")
avg.s <- var.get.nc(nc, "s")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
# select threshold 1 based on 95pctl of cv
thresh1 <- quantile(avg.cv, probs = 0.95, na.rm = T)
SDmask[which(avg.cv <= thresh1)] <- T
# select threshold 2 based on 95pctl of ek
EKmask <- !LSmask
thresh2 <- quantile(avg.ek[which(avg.s >= 1)], probs = 0.95, na.rm = T)
EKmask[which(avg.ek <= thresh2 & avg.ek > 0)] <- T
# Now bring back the grids that have atleast 60% days each decade with 1 or more stations
# Import netcdf of total stations for each decade
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_totalDaysWithAtleast1StnPerDecade.nc")
record_length_per_decade <- var.get.nc(nc, "s")
close.nc(nc)
# Mask record_length_per_decade
record_length_per_decade[is.na(LSmask)] <- NA
# How many days in each decade
dates <- seq(ymd("19500101"), ymd("20131231"),1)
ydates <- year(dates)
days_per_decade <- sapply(seq(1960, 2020, 10), FUN = function(n) {length(which(ydates >= n - 10 & ydates < n))})
thresh <- 0.6
stationMask <- array(dim = c(dim(LSmask), 7), data <- !LSmask)
for (i in 1:7) {
stationMask[,,i][which(record_length_per_decade[,,i] >= thresh * days_per_decade[i])] <- T
}
stationMask <- stationMask[,,1] & stationMask[,,2] & stationMask[,,3] & stationMask[,,4] & stationMask[,,5] &
stationMask[,,6] & stationMask[,,7]
EKmask[stationMask] <- T
# Combine SDmask and EKmask
QualityMask <- !LSmask
QualityMask[SDmask & EKmask] <- T
# We will use RNetCDF to write to ncdf instead
library(RNetCDF)
nc <- create.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/ALL/REGEN_V1.1_ALLStns_1950-2013_1deg_QualityMask.nc")
# two dimensions
dim.def.nc(nc, "lon", 360)
dim.def.nc(nc, "lat", 180)
# three variables including two coordinate vars
var.def.nc(nc, "lon", "NC_DOUBLE", "lon")
var.def.nc(nc, "lat", "NC_DOUBLE", "lat")
var.def.nc(nc, "p", "NC_INT", c(0, 1))
# attributes
att.put.nc(nc, "lon", "unit", "NC_CHAR", "degrees_east")
att.put.nc(nc, "lat", "unit", "NC_CHAR", "degrees_north")
att.put.nc(nc, "lon", "long_name", "NC_CHAR", "Longitude")
att.put.nc(nc, "lat", "long_name", "NC_CHAR", "Latitude")
att.put.nc(nc, "p", "unit", "NC_CHAR", "boolean")
att.put.nc(nc, "p", "long_name", "NC_CHAR", "boolean mask for REGEN precipitation V1.0")
att.put.nc(nc, "p", "missing_value", "NC_INT", -99999)
att.put.nc(nc, "NC_GLOBAL", "title", "NC_CHAR", "REGEN V1.1 1deg Quality Mask based on Std Dev and Krig Err 1950-2013")
att.put.nc(nc, "NC_GLOBAL", "history", "NC_CHAR", paste("Created on", Sys.Date()))
# put vars
var.put.nc(nc, "lon", lon)
var.put.nc(nc, "lat", lat)
var.put.nc(nc, "p", QualityMask)
close.nc(nc)
##############
# Plot the masks
library(maps)
par(mar = c(3,1,3,1) + 0.1, bg = NA)
image(lon, lat, EKmask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = rev(c("#d95f02","#f1eef6")), cex.main = 2)# , main = "REGEN data quality mask") #
stationMask[!stationMask] <- NA
image(lon, lat, stationMask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = '#d95f02', add = T)
SDmask[SDmask] <- NA
image(lon, lat, SDmask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = '#bebada', add = T)
map("world", interior = F, add = T)
legend("bottom", horiz = T, bty = "n", col = c("#f1eef6", '#bebada'), pch = 19, cex = 1.8,
legend = c("Masked based on Kriging Error", "Masked based on Std Dev"),
xpd = T, inset = c(0, -0.1), pt.cex = 4)
# Do the same for long term 40yr data
# Let's first import the netcdf for one year
library(RNetCDF)
library(lubridate)
# Create the mask
# First import the LSmask
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.0/ALL/CCRC_V1.0_1950-2013_1deg_LandSeamask.nc")
LSmask <- var.get.nc(nc, "p")
close.nc(nc)
SDmask <- !LSmask
# Mask based on Standard Dev
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_timavg.nc")
ann.clim <- var.get.nc(nc, "p")
avg.sd <- var.get.nc(nc,"sd")
avg.cv <- avg.sd/ann.clim
avg.ek <- var.get.nc(nc, "ek")
avg.s <- var.get.nc(nc, "s")
lon <- var.get.nc(nc, "lon")
lat <- var.get.nc(nc, "lat")
close.nc(nc)
# select threshold 1 based on 95pctl of cv
thresh1 <- quantile(avg.cv, probs = 0.95, na.rm = T)
SDmask[which(avg.cv <= thresh1)] <- T
# select threshold 2 based on 95pctl of ek
EKmask <- !LSmask
thresh2 <- quantile(avg.ek[which(avg.s >= 1)], probs = 0.95, na.rm = T)
EKmask[which(avg.ek <= thresh2 & avg.ek > 0)] <- T
# Now bring back the grids that have atleast 60% days each decade with 1 or more stations
# Import netcdf of total stations for each decade
nc <- open.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_totalDaysWithAtleast1StnPerDecade.nc")
record_length_per_decade <- var.get.nc(nc, "s")
close.nc(nc)
# Mask record_length_per_decade
record_length_per_decade[is.na(LSmask)] <- NA
# How many days in each decade
dates <- seq(ymd("19500101"), ymd("20131231"),1)
ydates <- year(dates)
days_per_decade <- sapply(seq(1960, 2020, 10), FUN = function(n) {length(which(ydates >= n - 10 & ydates < n))})
thresh <- 0.6
stationMask <- array(dim = c(dim(LSmask), 7), data <- !LSmask)
for (i in 1:7) {
stationMask[,,i][which(record_length_per_decade[,,i] >= thresh * days_per_decade[i])] <- T
}
stationMask <- stationMask[,,1] & stationMask[,,2] & stationMask[,,3] & stationMask[,,4] & stationMask[,,5] &
stationMask[,,6] & stationMask[,,7]
EKmask[stationMask] <- T
# Combine SDmask and EKmask
QualityMask <- !LSmask
QualityMask[SDmask & EKmask] <- T
# We will use RNetCDF to write to ncdf instead
library(RNetCDF)
nc <- create.nc("/srv/ccrc/data11/z3289452/DailyPRCP_netCDFs/CCRCGlobalGriddedDailyPrecip/VERSION1.1/FORTY/REGEN_V1.1_LongTermStns_1950-2013_1deg_QualityMask.nc")
# two dimensions
dim.def.nc(nc, "lon", 360)
dim.def.nc(nc, "lat", 180)
# three variables including two coordinate vars
var.def.nc(nc, "lon", "NC_DOUBLE", "lon")
var.def.nc(nc, "lat", "NC_DOUBLE", "lat")
var.def.nc(nc, "p", "NC_INT", c(0, 1))
# attributes
att.put.nc(nc, "lon", "unit", "NC_CHAR", "degrees_east")
att.put.nc(nc, "lat", "unit", "NC_CHAR", "degrees_north")
att.put.nc(nc, "lon", "long_name", "NC_CHAR", "Longitude")
att.put.nc(nc, "lat", "long_name", "NC_CHAR", "Latitude")
att.put.nc(nc, "p", "unit", "NC_CHAR", "boolean")
att.put.nc(nc, "p", "long_name", "NC_CHAR", "boolean mask for REGEN precipitation V1.0")
att.put.nc(nc, "p", "missing_value", "NC_INT", -99999)
att.put.nc(nc, "NC_GLOBAL", "title", "NC_CHAR", "REGEN V1.0 1deg Quality Mask based on Std Dev and Krig Err 1950-2013 for the long term data, i.e. data based on stations that have at least 40yrs of data in 1950-2013")
att.put.nc(nc, "NC_GLOBAL", "history", "NC_CHAR", paste("Created on", Sys.Date()))
# put vars
var.put.nc(nc, "lon", lon)
var.put.nc(nc, "lat", lat)
var.put.nc(nc, "p", QualityMask)
close.nc(nc)
##############
library(maps)
# Plot the masks
par(mar = c(3,1,3,1) + 0.1, bg = NA)
image(lon, lat, EKmask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = rev(c("#d95f02","#f1eef6")), cex.main = 2)# , main = "REGEN Long Term 40yr data quality mask") #
stationMask[!stationMask] <- NA
image(lon, lat, stationMask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = '#d95f02', add = T)
SDmask[SDmask] <- NA
image(lon, lat, SDmask, ylim = c(-60,90), xaxt = "n", yaxt = "n", xlab = "", ylab = "",
col = '#bebada', add = T)
map("world", interior = F, add = T)
legend("bottom", horiz = T, bty = "n", col = c("#f1eef6", '#bebada'), pch = 19, cex = 1.8,
legend = c("Masked based on Kriging Error", "Masked based on Std Dev"),
xpd = T, inset = c(0, -0.1), pt.cex = 4)