This document examines constuction of a a set of functions which compute multiple regression using matrix functions.

The input consists several parameters,

The function returns the coefficients of the regression equation.

linearRegression = function (x,y) {
    n = dim(y)[1]
    assertthat::assert_that(dim(y)[1]==dim(x)[1])
    ones = rep(1,n)
    m = matrix(ones,ncol = 1)
    nc = dim(y)[2]
    m = cbind(m,x)
    colnames (m) = NULL
    m = as.matrix(m)
    #transpose
    tm = t(m)
    tm.m = tm %*% m
    # the solve function finds the inverse of the matrix
    tm.mSolved = solve (tm.m)
    y = as.matrix(y)
    tm.y = tm %*% y
    coef = tm.mSolved %*% tm.y
    colnames(coef) = c("Coefficients")
    coef
    
}
# calling example
# result = linearRegression(x,y)

We now verify this function with the diamonds dataset. It resides in the ggplot2 library, so that library must be loaded.

We will use carat and depth to predict price.

library(ggplot2)
head(str(diamonds))
Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   53940 obs. of  10 variables:
 $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
 $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
 $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
 $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
 $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
 $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
 $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
 $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
 $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
 $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
NULL
# build separate x and y matrices 
price = diamonds$price
price = as.data.frame(price)
caratNdepth = diamonds$carat
caratNdepth = as.data.frame(caratNdepth)
caratNdepth$depth = diamonds$depth
coefficients = linearRegression(caratNdepth,price)
coefficients
     Coefficients
[1,]    4045.3332
[2,]    7765.1407
[3,]    -102.1653

Verify that the coefficients are the same by using the built-in R function lm().

lmodel = summary(lm(price$price ~ ., data= caratNdepth))
lmodel$coefficients
             Estimate Std. Error   t value      Pr(>|t|)
(Intercept) 4045.3332 286.205390  14.13437  2.809887e-45
caratNdepth 7765.1407  14.009367 554.28204  0.000000e+00
depth       -102.1653   4.635278 -22.04082 3.486190e-107

So our function agrees with the R function. We will not calculate the standard error and t values, nor the probability. However, we do have enoough information to caclulate r squared.

Your code goes here..

# from the lm function
print(lmodel$r.squared)
[1] 0.8506755

How would you calculate it?

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KVGhpcyBkb2N1bWVudCBleGFtaW5lcyBjb25zdHVjdGlvbiBvZiBhIGEgc2V0IG9mIGZ1bmN0aW9ucyB3aGljaCBjb21wdXRlIG11bHRpcGxlIHJlZ3Jlc3Npb24gdXNpbmcgbWF0cml4IGZ1bmN0aW9ucy4NCg0KVGhlIGlucHV0IGNvbnNpc3RzIHNldmVyYWwgcGFyYW1ldGVycywgDQoNCiAgKiB5IC0gdGhlIGRlcGVuZGVudCB2YXJpYWJsZSBhcyBhIHZlY3Rvcg0KICAqIHggLSBhIGRhdGEgZnJhbWUgY29uc2lzdGluZyBvZiB0aGUgaW5kZXBlbmRlbnQgdmFyaWFibGVzDQoNClRoZSBmdW5jdGlvbiByZXR1cm5zIHRoZSBjb2VmZmljaWVudHMgb2YgdGhlIHJlZ3Jlc3Npb24gZXF1YXRpb24uIA0KICANCiAgDQoNCmBgYHtyfQ0KbGluZWFyUmVncmVzc2lvbiA9IGZ1bmN0aW9uICh4LHkpIHsNCiAgICBuID0gZGltKHkpWzFdDQogICAgYXNzZXJ0dGhhdDo6YXNzZXJ0X3RoYXQoZGltKHkpWzFdPT1kaW0oeClbMV0pDQogICAgb25lcyA9IHJlcCgxLG4pDQogICAgbSA9IG1hdHJpeChvbmVzLG5jb2wgPSAxKQ0KICAgIG5jID0gZGltKHkpWzJdDQogICAgbSA9IGNiaW5kKG0seCkNCiAgICBjb2xuYW1lcyAobSkgPSBOVUxMDQogICAgbSA9IGFzLm1hdHJpeChtKQ0KICAgICN0cmFuc3Bvc2UNCiAgICB0bSA9IHQobSkNCiAgICB0bS5tID0gdG0gJSolIG0NCiAgICAjIHRoZSBzb2x2ZSBmdW5jdGlvbiBmaW5kcyB0aGUgaW52ZXJzZSBvZiB0aGUgbWF0cml4DQogICAgdG0ubVNvbHZlZCA9IHNvbHZlICh0bS5tKQ0KICAgIHkgPSBhcy5tYXRyaXgoeSkNCiAgICB0bS55ID0gdG0gJSolIHkNCiAgICBjb2VmID0gdG0ubVNvbHZlZCAlKiUgdG0ueQ0KICAgIGNvbG5hbWVzKGNvZWYpID0gYygiQ29lZmZpY2llbnRzIikNCiAgICBjb2VmDQogICAgDQp9DQojIGNhbGxpbmcgZXhhbXBsZQ0KIyByZXN1bHQgPSBsaW5lYXJSZWdyZXNzaW9uKHgseSkNCmBgYA0KV2Ugbm93IHZlcmlmeSB0aGlzIGZ1bmN0aW9uIHdpdGggdGhlIGRpYW1vbmRzIGRhdGFzZXQuIEl0IHJlc2lkZXMgaW4gdGhlIGdncGxvdDIgbGlicmFyeSwgc28gdGhhdCBsaWJyYXJ5IG11c3QgYmUgbG9hZGVkLg0KDQpXZSB3aWxsIHVzZSBjYXJhdCBhbmQgZGVwdGggdG8gcHJlZGljdCBwcmljZS4NCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpoZWFkKHN0cihkaWFtb25kcykpDQojIGJ1aWxkIHNlcGFyYXRlIHggYW5kIHkgbWF0cmljZXMgDQoNCnByaWNlID0gZGlhbW9uZHMkcHJpY2UNCnByaWNlID0gYXMuZGF0YS5mcmFtZShwcmljZSkNCmNhcmF0TmRlcHRoID0gZGlhbW9uZHMkY2FyYXQNCmNhcmF0TmRlcHRoID0gYXMuZGF0YS5mcmFtZShjYXJhdE5kZXB0aCkNCmNhcmF0TmRlcHRoJGRlcHRoID0gZGlhbW9uZHMkZGVwdGgNCg0KY29lZmZpY2llbnRzID0gbGluZWFyUmVncmVzc2lvbihjYXJhdE5kZXB0aCxwcmljZSkNCmNvZWZmaWNpZW50cw0KDQpgYGANCg0KVmVyaWZ5IHRoYXQgdGhlIGNvZWZmaWNpZW50cyBhcmUgdGhlIHNhbWUgYnkgdXNpbmcgdGhlIGJ1aWx0LWluIFIgZnVuY3Rpb24gbG0oKS4NCg0KYGBge3J9DQoNCmxtb2RlbCA9IHN1bW1hcnkobG0ocHJpY2UkcHJpY2UgfiAuLCBkYXRhPSBjYXJhdE5kZXB0aCkpDQpsbW9kZWwkY29lZmZpY2llbnRzDQoNCmBgYA0KDQpTbyBvdXIgZnVuY3Rpb24gYWdyZWVzIHdpdGggdGhlIFIgZnVuY3Rpb24uIFdlIHdpbGwgbm90IGNhbGN1bGF0ZSB0aGUgc3RhbmRhcmQgZXJyb3IgYW5kIHQgdmFsdWVzLCBub3IgdGhlIHByb2JhYmlsaXR5LiBIb3dldmVyLCB3ZSBkbyBoYXZlIGVub291Z2ggaW5mb3JtYXRpb24gdG8gY2FjbHVsYXRlIHIgc3F1YXJlZC4NCg0KWW91ciBjb2RlIGdvZXMgaGVyZS4uDQoNCmBgYHtyfQ0KDQojIGZyb20gdGhlIGxtIGZ1bmN0aW9uDQoNCnByaW50KGxtb2RlbCRyLnNxdWFyZWQpDQoNCmBgYA0KDQoNCkhvdyB3b3VsZCB5b3UgY2FsY3VsYXRlIGl0Pw0KDQo=