######### HOMEWORK 1, STOR 665: COMPUTING PART, DUE FEB 8, 2016 #########
######### Student Name: Jie Huang
# INSTRUCTION: Edit this R file by adding your solution to the end of each
# question. Your submitted file should run smoothly in R and provide all
# required results. You are allowed to work with other students but the
# homework should be in your own words. Identical solutions will receive a 0
# in grade and will be investigated.
# This homework is an exercise in Least Squares computation. We will
# build up R code that
# 1) computes a Q-R decomposition of a matrix X,
# 2) solves equations when the coefficients are upper triangular,
# 3) inverts an upper triangular matrix R, and finally
# 4) computes and returns most things we find useful in regression.
# We will be negligent in the sense that we will ignore precautions
# that a numerical analyst would build into his code, and we will not
# consider measures to improve numerical conditioning such as pivoting
# (switching the order of the variables) either.
# BACKGROUND: Most LS solvers are based on decompositions of the
# design matrix of the form X = Q R, where R is square and triangular
# and Q has orthonormal columns. The idea is that the LS criterion
# can be written as follows:
# | y - X b |^2 = | Q^T y - Q^T X b |^2 + | r |^2
# = | Q^T y - R b |^2 + | r |^2
# where the first term can be forced to zero by solving
# Q^T y = R b
# This equation is easily solved when R is triangular.
# Furthermore, for inference we want the matrix (X^T X)^{-1},
# but with a Q-R decomposition this is easily gotten by
# inverting R because
# (X^T X)^{-1} = R^{-1} R^{-1}^T
#----------------------------------------------------------------
# PROBLEM 1: Write a function 'gs(X)' to implement the so-called
# modified Gram-Schmidt procedure for orthnormalizing an nxp matrix.
# This procedure consists of orthnormalizing the columns of X to form
# the matrix Q of the same size, and storing the coefficients to
# reconstitute X from Q in an upper triangular matrix R.
# Here is the algorithm:
rm(list=ls(all=TRUE))
gs <- function(X)
{
p <- dim(X)[2]
R = diag(p) #initialize R to be a p*p identity matrix
Q <- X
# Loop over the columns of Q and do the following:
# At stage j,
# normalize Q[,j] in place
# adjust the columns of Q[,(j+1):p] for Q[,j]
# store the original length of Q[,j] in R[j,j]
# and the coefficients from adjustment properly in row R[j,].
# In this algorithm, use loops and sum() to calculate inner products
# and sqrt() for lengths, but not lm() or any other high-level function.
for (j in seq(1,p-1))
{
for (k in seq( (j+1),p))
{
R[j,k]= sum(X[,k]*Q[,j])/sum(Q[,j]*Q[,j]) #use sum(a*b) here as t(a)%*%b
#alternative way, using a selfdefined innerprod function
# innerprod <- function(a,b)
# {return(sum(a*b))}
#R[j,k]= innerprod(X[,k],Q[,j])/innerprod(Q[,j],Q[,j])
#R[j,k]= (t(X[,k])%*%Q[,j])/(t(Q[,j]) %*% Q[,j])
Q[,k] = Q[,k] - R[j,k] * Q[,j]
}
}
#normalize Q and R
for (j in seq(1,p))
{
#norm = sqrt(innerprod(Q[,j],Q[,j]))
norm = sqrt(sum(Q[,j]^2))
R[j,] = R[j,] * norm
Q[,j] = Q[,j]/norm
}
# Finish the function gs() with return(list(Q=Q, R=R))
returned=list(Q=Q,R=R)
return(returned)
}
# Test your function on these data:
# If you do this right, it should hold that X = Q R.
X <- cbind(rep(1,10), 1:10, (1:10)^2)
sol <- gs(X)
# Show these and comment on them:
sol$Q %*% sol$R
## [,1] [,2] [,3]
## [1,] 1 1 1
## [2,] 1 2 4
## [3,] 1 3 9
## [4,] 1 4 16
## [5,] 1 5 25
## [6,] 1 6 36
## [7,] 1 7 49
## [8,] 1 8 64
## [9,] 1 9 81
## [10,] 1 10 100
round(t(sol$Q) %*% sol$Q, 3)
## [,1] [,2] [,3]
## [1,] 1 0 0
## [2,] 0 1 0
## [3,] 0 0 1
#----------------------------------------------------------------
# PROBLEM 2: Write function 'tri.solve(R,z)' that accepts an upper
# triangular matrix R and a colum z and returns the solution b of z =
# R b. Show the results for these inputs:
# z <- 10:1
# R <- (row(diag(10)) <= col(diag(10)))
# You may use loops and the sum() function inside tri.solve() to spare
# yourself an inner loop, but you must not use canned solvers such as
# solve() or ginv() (the latter from the MASS package).
# SOLUTION:
tri.solve <- function(R,z)
{
p = length(z)
b = rep(0,p)
for (j in p:1)
{
b[j] = z[j]/R[j,j]
for (k in 1:(p-1)) z[k] = z[k] - R[k,j]*b[j]
}
return(b)
}
z <- 10:1
R <- (row(diag(10)) <= col(diag(10)))
tri.solve(R,z)
## [1] 1 1 1 1 1 1 1 1 1 1
#----------------------------------------------------------------
# PROBLEM 3: No programming, just thinking.
# Assume R and R1 are upper triangular square matrices.
# Questions:
# a) If the vector b is has non-zero values only in the first k entries,
# what can one say about z=Rb?
# b) When is R invertible?
# c) If the vector z has non-zero values only in the first k entries,
# and if R is invertible, what can one say about the vector b?
# d) What can one say about R1%*%R?
# e) What can one say about R^{-1}?
# SOLUTION:
# a) z is a linear combination of the first k columns of R, i.e., z lives in
# a k-dimensional subspace of the original p-dimensional space.
#
# b) none of the diagnoral items are 0.
#
# c) the bottom right submatrix of R with (p-k)*(p-k) dimension (give a name to this matrix-- R'), multiples the last (p-k) elements of b (give a name b'), is 0. i.e. R'b'=0. i.e. b' lives in the left null space of R'
#
# d) It is still upper triangular.
#
# e) Still upper triangular.
#----------------------------------------------------------------
# PROBLEM 4: For inference about the regression coefficients, we need
# the matrix (X^T X)^{-1}. In preparation for it, we need the
# inversion of an upper triangular matrix R because from a Q-R
# decomposition X=QR we can easily (X^T X)^{-1} if we have R^{-1}.
# Package the inversion in a function inv(R). Try it out on the R
# matrix of the above 'sol':
# inv(sol$R)
# inv(sol$R) %*% sol$R
# Show both and comment on both.
# SOLUTION:
inv <- function(R)
{
p = dim(R)[1]
z = diag(p)
Rinv = matrix(0,p,p)
for (j in 1:p)
{ Rinv[,j] = tri.solve(R,z[,j])}
return(Rinv)
}
#
# inv <- function(R)
# {
# p = dim(R)[1]
# gj <- data.frame(R, diag(p))
#
# for (j in p:2)
# {
# for (k in (j-1):1)
# {
# gj[k,] = gj[k,] - gj[j,]*R[k,j]/R[j,j]
# }
# }
#
# for (j in 1:p) gj[j,] = gj[j,]/gj[j,j]
#
# Rinv <- data.matrix(gj[ ,(p+1):(p*2)])
# return(Rinv)
# }
inv(sol$R)
## [,1] [,2] [,3]
## [1,] 0.3162278 -0.6055301 0.95742711
## [2,] 0.0000000 0.1100964 -0.47871355
## [3,] 0.0000000 0.0000000 0.04351941
round(inv(sol$R) %*% sol$R,3)
## [,1] [,2] [,3]
## [1,] 1 0 0
## [2,] 0 1 0
## [3,] 0 0 1
#----------------------------------------------------------------
# PROBLEM 5: Using tri.solve(R,z), inv(R) and gs(X,y), write a
# function reg(X,y) that computes a list with the following named
# elements:
reg <- function(X,y){
# a) "Coeffs": a matrix with one row per regression coefficient and columns
# named "Coefficient", "Std.Err.Est", "t-Statistic", "P-Value"
X <- cbind(Icept=1,X)
n=dim(X)[1]
p=dim(X)[2]
sol <- gs(X) # gives sol$Q and sol$R
z = t(sol$Q) %*% y
beta_hat <- tri.solve(sol$R,z) #"Coefficient" estimate
Rinv <- inv(sol$R)
XTXinv <- Rinv %*% t(Rinv)
#beta_hat <- XTXinv %*% t(X) %*% y #"Coefficient" estimate, alternative method
RSS <- sum((y - X %*% beta_hat)^2)
sigma2 <- RSS/(n-p)
s <- sqrt(sigma2)
sd_beta_hat <- s * sqrt(diag(XTXinv)) # "Std.Err.Est"
tStats <- beta_hat/sd_beta_hat # "t-Statistic"
p_value <- 2*(1- pt(tStats, n-p)) # Pr(>|t|)
matrixA <- cbind(beta_hat, sd_beta_hat, tStats, p_value)
colnames(matrixA) <- c("Coefficient", "Std.Err.Est","t-Statistic", "P-Value")
# b) "Var": the coefficient variance matrix s^2*(X^T X)^{-1}
Var <- s^2 * XTXinv
# c) "RMSE": a number, s
RMSE <- sqrt(RSS/n)
# d) "R2": a number, R2
SSTO <- sum((y-mean(y))^2)
R_square <- (SSTO-RSS)/SSTO
# e) "Residuals": the vector of residuals
Residuals <- y - X %*% beta_hat
returned <- list(Coeffs = matrixA, Var=Var, RMSE=RMSE, R2=R_square, Residuals=Residuals)
return (returned)
}
# Assume that X does not have a column of 1's but an intercept is desired,
# hence first thing in the function do X <- cbind(Icept=1,X) .
# You may use matrix multiplication if convenient, but no high-level
# functions other than things like sqrt() and pt().
# Try your solution on the following data and show the full result:
X <- cbind(Pred1=1:10, Pred2=(1:10)^2)
y <- rep(1,10) + 2*X[,1] + 3*X[,2] + resid(lm(rep(0:1,5)~X))
reg(X,y)
## $Coeffs
## Coefficient Std.Err.Est t-Statistic P-Value
## [1,] 1 0.69215351 1.444766 1.917550e-01
## [2,] 2 0.28907249 6.918680 2.274824e-04
## [3,] 3 0.02561073 117.138380 8.713030e-13
##
## $Var
## [,1] [,2] [,3]
## [1,] 0.47907648 -0.181818182 0.0144300144
## [2,] -0.18181818 0.083562902 -0.0072150072
## [3,] 0.01443001 -0.007215007 0.0006559097
##
## $RMSE
## [1] 0.492366
##
## $R2
## [1] 0.9999771
##
## $Residuals
## [,1]
## [1,] -0.3636364
## [2,] 0.6060606
## [3,] -0.4242424
## [4,] 0.5454545
## [5,] -0.4848485
## [6,] 0.4848485
## [7,] -0.5454545
## [8,] 0.4242424
## [9,] -0.6060606
## [10,] 0.3636364
# Compare with results from the canned regression function
summary(lm(y~X))
##
## Call:
## lm(formula = y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6061 -0.4697 0.0000 0.4697 0.6061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.00000 0.69215 1.445 0.191755
## XPred1 2.00000 0.28907 6.919 0.000227 ***
## XPred2 3.00000 0.02561 117.138 8.71e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5885 on 7 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.528e+05 on 2 and 7 DF, p-value: < 2.2e-16
# No need to comment, but you need to get the exact numbers.
# Compare the function execution time. Your function should be much faster.
system.time(for (i in 1:100){reg(X,y)})
## user system elapsed
## 0.033 0.001 0.034
system.time(for (i in 1:100){lm(y~X)})
## user system elapsed
## 0.099 0.001 0.102
#----------------------------------------------------------------