## Loading required package: sp
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Loading required package: terra
## terra version 1.2.10
## 
## Attaching package: 'terra'
## The following object is masked from 'package:rgdal':
## 
##     project
## Loading required package: latticeExtra
## 
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
## 
##     layer
## Checking rgeos availability: TRUE
soda <- nc_open("SODA_train.nc")
label <- nc_open("SODA_label.nc")
attributes(soda)$names
##  [1] "filename"    "writable"    "id"          "safemode"    "format"     
##  [6] "is_GMT"      "groups"      "fqgn2Rindex" "ndims"       "natts"      
## [11] "dim"         "unlimdimid"  "nvars"       "var"
print(paste("The file has",soda$nvars,"variables,", soda$ndims,"dimensions."))
## [1] "The file has 4 variables, 4 dimensions."
soda
## File SODA_train.nc (NC_FORMAT_NETCDF4):
## 
##      4 variables (excluding dimension variables):
##         float sst[lon,lat,month,year]   (Contiguous storage)  
##             _FillValue: NaN
##         float t300[lon,lat,month,year]   (Contiguous storage)  
##             _FillValue: NaN
##         double ua[lon,lat,month,year]   (Contiguous storage)  
##             _FillValue: NaN
##         double va[lon,lat,month,year]   (Contiguous storage)  
##             _FillValue: NaN
## 
##      4 dimensions:
##         year  Size:100
##         month  Size:36
##         lat  Size:24
##             _FillValue: NaN
##         lon  Size:72
##             _FillValue: NaN
label
## File SODA_label.nc (NC_FORMAT_NETCDF4):
## 
##      1 variables (excluding dimension variables):
##         double nino[month,year]   (Contiguous storage)  
##             _FillValue: NaN
## 
##      2 dimensions:
##         year  Size:100
##         month  Size:36

Variables Extractiom (First slice)

The original data set involves multi dimentions and multi variables,thus I get the first slice for following analysis.

lon <- ncvar_get(soda,"lon")
lat <- ncvar_get(soda,"lat")
sst <- ncvar_get(soda,"sst")
t300 <- ncvar_get(soda,"t300")
ua <- ncvar_get(soda,"ua")
va <- ncvar_get(soda,"va")

nino <- ncvar_get(label,"nino")


sst_month <- sst[1,1,,1]
sst_year <- sst[1,1,1,]

t300_month <- t300[1,1,,1]
t300_year <- sst[1,1,1,]

ua_month <- ua[1,1,,1]
ua_year <- ua[1,1,1,]

va_month <- va[1,1,,1]
va_year <- va[1,1,1,]


nino_month <- nino[,1]
nino_year <- nino[1,]


sst_1 <- sst[,,,1]
month <- ncvar_get(soda,"month")

dim(sst)
## [1]  72  24  36 100
dim(sst[0,0,0,])
## [1]   0   0   0 100
sst_slice <- sst[,,1,1]
dim(sst_slice)
## [1] 72 24
nino <- ncvar_get(label,"nino")
nino_month <- nino[,1]
nino_year <- nino[1,]
sst_month <- sst[1,1,,1]
sst_year <- sst[1,1,1,]
library(ggfortify)
df_month <- data.frame(lon,lat,sst_month,t300_month,ua_month,va_month,nino_month)
time_series <- ts(df_month,frequency = 12,start = c(2010,7))
#plot(time_series[,3:7],type="l",xlab="time",ylab="")
autoplot(time_series[,3:7]) +
  ggtitle("Time Series Plot of the `EuStockMarkets' Time-Series") +
  theme(plot.title = element_text(hjust = 0.5))

High dimentional data visualizing

library(tabplot)
## Loading required package: bit
## 
## Attaching package: 'bit'
## The following object is masked from 'package:base':
## 
##     xor
## Loading required package: ff
## Attaching package ff
## - getOption("fftempdir")=="/var/folders/tj/qdp8gjxs4rq6yfb50p60w3180000gn/T//RtmpfqfJeL/ff"
## - getOption("ffextension")=="ff"
## - getOption("ffdrop")==TRUE
## - getOption("fffinonexit")==TRUE
## - getOption("ffpagesize")==65536
## - getOption("ffcaching")=="mmnoflush"  -- consider "ffeachflush" if your system stalls on large writes
## - getOption("ffbatchbytes")==16777216 -- consider a different value for tuning your system
## - getOption("ffmaxbytes")==536870912 -- consider a different value for tuning your system
## 
## Attaching package: 'ff'
## The following objects are masked from 'package:terra':
## 
##     is.factor, ncol<-, nrow<-
## The following objects are masked from 'package:raster':
## 
##     filename, is.factor, ncol<-, nrow<-
## The following objects are masked from 'package:utils':
## 
##     write.csv, write.csv2
## The following objects are masked from 'package:base':
## 
##     is.factor, is.ordered
## Loading required package: ffbase
## Registered S3 methods overwritten by 'ffbase':
##   method   from
##   [.ff     ff  
##   [.ffdf   ff  
##   [<-.ff   ff  
##   [<-.ffdf ff
## 
## Attaching package: 'ffbase'
## The following object is masked from 'package:terra':
## 
##     %in%
## The following object is masked from 'package:raster':
## 
##     %in%
## The following objects are masked from 'package:base':
## 
##     %in%, table
tableplot(df_month, sortCol="lat", scales="lin",nBins = 100, fontsize = 6)
## Warning in tableplot_checkBins(nBins, max(N, 2)): Setting nBins (100) to number
## of rows (72)

library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:terra':
## 
##     wrap
ggparcoord(data = df_month,
  scale = "uniminmax", alphaLines = 0.1) + 
  ylab("PCP of Global Fire Data") +
  theme_bw() +
  theme(axis.text.x  = element_text(angle=315, vjust=1.0, hjust=0.0, size=14),
        axis.title.x = element_blank(), 
        axis.text.y = element_blank(), axis.ticks.y = element_blank() )

Time Series Plotting

library(mvtsplot)
heatmap(as.matrix(df_month[3:7]))

mvtsplot(df_month[, 3:7], smooth.df = 25, levels=9, margin=TRUE)

lonlat <- as.matrix(expand.grid(lon,lat))
#dim(lonlat)
sst_vec <- as.vector(sst_month)
length(sst_vec)
## [1] 36
sst_df <- data.frame(cbind(lonlat,sst_vec))
names(sst_df) <- c("lon","lat",paste("sst",as.character(1),sep = "_"))
head(na.omit(sst_df))
##   lon lat     sst_1
## 1   0 -55 1.8618509
## 2   5 -55 1.2667823
## 3  10 -55 0.8163485
## 4  15 -55 0.5574367
## 5  20 -55 0.3394764
## 6  25 -55 0.2006739

SpatialTemporal Plotting

sodar <- raster("SODA_train.nc")
## Warning in .varName(nc, varname, warn = warn): varname used is: sst
## If that is not correct, you can set it to one of: sst, t300, ua, va
## Warning in .rasterObjectFromCDF(x, type = objecttype, band = band, ...): "level"
## set to 1 (there are 36 levels)
levelplot(sodar)

# plot with outlines, a better color scale, no marginal plots
#mapTheme <- rasterTheme(region=brewer.pal(8,"Blues"))
#plt <- levelplot(sodar, margin=F, par.settings=mapTheme)
#plt 
#+ latticeExtra::layer(sp.lines(world_outline, col="gray30", lwd=0.5))


mapTheme <- rasterTheme(region = rev(brewer.pal(10, "RdBu")))
cutpts <- c(-2.5, -2.0, -1.5, -1, -0.5, 0, 0.5, 1.0, 1.5, 2.0, 2.5)
plt <- levelplot(sodar, margin = F, at=cutpts, cuts=11, pretty=TRUE, par.settings = mapTheme,
  main="test variable -- as raster layer")
plt 

#+ layer(sp.lines(world_outline, col="black", lwd=1.0))
sodar
## class      : RasterLayer 
## band       : 1  (of  100  bands)
## dimensions : 24, 72, 1728  (nrow, ncol, ncell)
## resolution : 5, 5  (x, y)
## extent     : -2.5, 357.5, -57.5, 62.5  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : SODA_train.nc 
## names      : sst 
## z-value    : 1 
## zvar       : sst 
## level      : 1

Bayesian Model

coords <- as.matrix(c(lon,lat))
p <- 5

beta.prior.mean <- as.matrix(rep(0,times=p))
beta.prior.precision <- matrix(0, nrow=p, ncol=p)
phi <-0.014
alpha <- 0.016/0.08
sigma.sq.prior.precision <- matrix(0,nrow = p,ncol = p)
sigma.sq.prior.shape <- 2.0
sigma.sq.prior.rate <- 0.08
#sp.exact <- bayesGeostatExact( 
#            nino_month ~ sst_month + t300_month + ua_month + va_month,
#            data=df_month, coords=coords, n.samples=1000, 
#            beta.prior.mean=beta.prior.mean, 
#            beta.prior.precision=beta.prior.precision,
#            cov.model="gaussian",
#            phi=phi,
#            alpha=alpha,
#            sigma.sq.prior.shape=sigma.sq.prior.shape,
#            sigma.sq.prior.rate=sigma.sq.prior.rate, 
#            sp.effects=FALSE)
#sp <- spLM(nino_month ~ sst_month + t300_month + ua_month + va_month,
#           data=df_month, coords=coords,
#           starting = list("phi"=3/200,"sigma.sq"=0.08,"tau.sq"=0.02),
#           tuning = list("phi"=0.1,"sigma.sq"=0.05,"tau.sq"=0.05),
#           priors = list("phi.Unif"=c(3/1500,3/50),"sigma.sq.IG"=c(2,0.08),
#                         "tau.sq.IG"=c(2,0.02)),
#           cov.model = "exponential",n.samples = 1000)