exercise08.R

# Big Data Exercise08 Spatial Smoothing at Scale
# Yanxin Li
# Nov 17 2017

rm(list=ls())
setwd("D:/2017 UT Austin/Statistical Models for Big Data/R")
options(warn = -1)

# Kernel smoothing
library(fields)

## Loading required package: spam

## Loading required package: dotCall64

## Loading required package: grid

## Spam version 2.1-1 (2017-07-02) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.

## 
## Attaching package: 'spam'

## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve

## Loading required package: maps

# Read in data
co2 <- read.csv("co2.csv", header = TRUE)
summary(co2)

##       day              lon               lat            co2avgret    
##  Min.   : 1.000   Min.   :-180.00   Min.   :-60.000   Min.   :347.8  
##  1st Qu.: 4.000   1st Qu.:-100.22   1st Qu.:-25.120   1st Qu.:373.1  
##  Median : 8.000   Median : -18.57   Median : -4.390   Median :375.1  
##  Mean   : 7.971   Mean   : -13.39   Mean   :  1.924   Mean   :375.5  
##  3rd Qu.:12.000   3rd Qu.:  62.47   3rd Qu.: 27.380   3rd Qu.:377.7  
##  Max.   :15.000   Max.   : 180.00   Max.   : 89.140   Max.   :403.7

# A quick look at the observations with world map
# png("co2.png")
quilt.plot(co2[,2:3], co2[,4])
world(add=TRUE)

# dev.off()

# Plot 2-D Gaussian distribution
library(rgl)

x <- seq(-3, 3, length=100)
y <- seq(-3, 3, length=100)
z <- outer(x,y, function(x,y) 0.5/pi*exp(-0.5*(x^2+y^2)))
persp3d(x, y, z,col = rainbow(100))

# Laplacian smoothing
library(Matrix)
library(gplots)

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

# Constructs the oriented edge matrix D for a 2-d grid of size nx by ny.
# This fuction is provided by Prof. James Scott
makeD2_sparse <- function (dim1, dim2)  {
  require(Matrix)
  D1 <- bandSparse(dim1 * dim2, m = dim1 * dim2, k = c(0, 1), 
                  diagonals = list(rep(-1, dim1 * dim2), 
                                   rep(1, dim1 * dim2 - 1)))
  D1 <- D1[(seq(1, dim1 * dim2)%%dim1) != 0, ]
  D2 <- bandSparse(dim1 * dim2 - dim1, m = dim1 * dim2, 
                  k = c(0, dim1), diagonals = list(rep(-1, dim1 * dim2), 
                                                   rep(1, dim1 * dim2 - 1)))
  return(rBind(D1, D2))
}

# Direct solver for Ax=b.
direct.sparse.solver <- function(A,b){
  
  x <- solve(A,y,sparse=T)
  
  return(x)
}

# Cholesky decomposition
chol.solver <- function (A,b){
  
  U <- chol(A)  # Upper Cholesky decomposition of A and U'U = A.
  decomp <- as(U, "sparseMatrix")
  
  # Replace Ax=b with U'Ux=b, solve U'z=b for z.
  z <- forwardsolve(t(decomp), b)
  # Solve Ux = z for x.
  x <- backsolve(decomp, z)
  
  return(x)
}

# Jacobi iterative solver for Ax=b.
jacobi.solver <- function(A,b,x0 = rep(1,length(b)),maxiter=100,tol=1E-14){
  # x0: Initial guess for vector x solution.
  # A: input matrix
  # b: input vector
  # maxiter: maximum iterations
  
  # Initialize values.
  xold <- x0
  Dinv <- 1/diag(A) # Diagonal of matrix A.
  R <- A        # Remainder; A with diagonal set to 0.
  diag(R) <- 0
  
  for (i in 1:maxiter){
    
    # Update x values.
    xnew <- Dinv * (b - crossprod(R,xold))
    
    # Check convergence.
    if(max(abs(xnew-xold)) < tol) { break; }
    
    # Update xold.
    xold <- xnew
  }
  
  return(xnew)
}

# Read in data. data.
data <- as.matrix(read.csv("fmri_z.csv", header = T))
data.mat <- Matrix(data)

# Heat map of noisy MRI data.
image(t(data.mat), sub='', xlab='',ylab='',cuts=80)

# Represent signal values as a vector.
y <- as.vector(data.mat)

# Oriented edge matrix.
D <- makeD2_sparse(128,128)

# Laplacian matrix.
L <- crossprod(D)

# Lambda penalty.
lambda <- 10

# Set up A = I + lambda*L
A <- lambda*L
diag(A) <- diag(A) + 1

# DIRECT SOLVER
xD <- direct.sparse.solver(A,y)
xD.grid <- Matrix(as.vector(xD),128,128,byrow=T)    # Reconstruct de-noised grid.
xD.grid[which(data.mat==0)] <- 0    # Set values which were 0 in original grid to 0.

# CHOLESKY SOLVER
xC <- chol.solver(A,y)
xC.grid <- Matrix(as.vector(xC),128,128,byrow=T)    # Reconstruct de-noised grid.
xC.grid[which(data.mat==0)] <- 0    # Set values which were 0 in original grid to 0.

# JACOBI SOLVER
xJ <- jacobi.solver(A,y,maxiter=1000)

## Note: method with signature 'dsparseMatrix#dgeMatrix' chosen for function 'crossprod',
##  target signature 'dsCMatrix#dgeMatrix'.
##  "CsparseMatrix#ddenseMatrix" would also be valid

xJ.grid <- Matrix(as.vector(xJ),128,128,byrow=T)    #Reconstruct de-noised grid.
xJ.grid[which(data.mat==0)] <- 0 # Set values which were 0 in original grid to 0.

# Quick comparison of Jacobi and Direct solver methods.
DJC <- cbind(xD, xJ, xC)

# Side by side heat maps of noisy and de-noised data.
# jpeg('lp.smoothing.noisey.jpg')
image(t(data.mat), sub='', xlab='',ylab='',cuts=80)

# dev.off()
# jpeg('lp.smoothing.jacobismoothed.jpg')
image(t(xJ.grid), sub='', xlab='',ylab='',cuts=80)

# dev.off()
# jpeg('lp.smoothing.directsmoothed.jpg')
image(t(xD.grid), sub='', xlab='',ylab='',cuts=80)

# dev.off()
image(t(xC.grid), sub='', xlab='',ylab='',cuts=80)

exercise08.R

Shuchen

Mon Dec 04 10:38:57 2017