Linear Regression in R

Sally Chen

9/13/2018

Slide

http://rpubs.com/sallychen/419792

Import Data

# load useful libraries
library(readxl)
Ritvars <- read_excel("Ritvars.xls")
head(Ritvars) #print some rows
##   ID TEST1 TEST2 IMPROVE DOSAGE DRUGDUM FEMALE AGE INTERVAL
## 1  1    75   100      25  0.452       1      0 108    0.592
## 2  2    80    80       0  0.550       1      1  90    0.329
## 3  3    80    70     -10  0.508       1      1 108    0.362
## 4  4    80    90      10  0.478       1      0 138    0.592
## 5  5    75    75       0  0.423       1      0  87    0.822
## 6  6    90   100      10  0.452       1      0 132    0.690
nrow(Ritvars) # number of rows
## [1] 64
ncol(Ritvars) # number of columns
## [1] 9
colnames(Ritvars) # names of columns
## [1] "ID"       "TEST1"    "TEST2"    "IMPROVE"  "DOSAGE"   "DRUGDUM" 
## [7] "FEMALE"   "AGE"      "INTERVAL"
head(Ritvars$DOSAGE) # $ to access column with column name
## [1] 0.452 0.550 0.508 0.478 0.423 0.452
summary(Ritvars$DOSAGE) # summary statistics on DOSAGE column
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.2035  0.2573  0.5088  0.7100
table(Ritvars$FEMALE) # table function to count females/males
## 
##  0  1 
## 47 17

Research Question

Does treatment effect vary across female and male?

For each individual i,

\[Improvement_i = \theta_1*Dosage_i + \theta_2*Dosage_i*Female_i + \theta_3\] - Expression in matrix algebra \[Y = X \Theta + e\]

\[X = \begin{bmatrix} Dosage_1 & Dosage_{1}*Female_{1} & 1\\ Dosage_2 & Dosage_{2}*Female_{2} & 1 \\ \dots\\ Dosage_N & Dosage_{N}*Female_{N} & 1 \end{bmatrix} \]

\[Y = \begin{bmatrix} Improvement_1\\ Improvement_2 \\ \dots \\ Improvement_N \end{bmatrix}\]

\[\Theta =\begin{bmatrix} \theta_1\\ \theta_2 \\ \theta_3 \end{bmatrix}\]

\[e= \begin{bmatrix} e_1\\ e_2 \\ \dots \\ e_N \end{bmatrix}\]

\[\hat{e} = Y - X\hat{\theta_{OLS}}\]

\[s^2=\frac{\hat{e}'\hat{e}}{(N-K)}\]

\[s.e(\hat{\theta_{OLS}}) = diag(s^2((X'X)^{-1}))^{1/2}\]

Matrix in R

Constructing a matrix

matrix(data=1:6,nrow = 3,ncol = 2)
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
a1 = 1:3
a2 = 4:6
cbind(a1,a2)
##      a1 a2
## [1,]  1  4
## [2,]  2  5
## [3,]  3  6
a1 = 1:3
a2 = 4:6
rbind(a1,a2)
##    [,1] [,2] [,3]
## a1    1    2    3
## a2    4    5    6

Accesing data from matrix objects

Demo: Access data from matrix objects

a = matrix(1:6,nrow=3,ncol=2) #create a 3*2 matrix
a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
a[1,1] # access the first cell
## [1] 1
a[1,] # access the first row
## [1] 1 4
a[,2] # access the second column
## [1] 4 5 6

Scalar & Matrix Calculations

a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
a+1  # add 1 to each element of the matrix
##      [,1] [,2]
## [1,]    2    5
## [2,]    3    6
## [3,]    4    7
a*2  # multiply 2 to each element of the matrix
##      [,1] [,2]
## [1,]    2    8
## [2,]    4   10
## [3,]    6   12

Elementwise Matrix Calculations

a + a  # element-wise addition
##      [,1] [,2]
## [1,]    2    8
## [2,]    4   10
## [3,]    6   12
a - a  # element-wise addition
##      [,1] [,2]
## [1,]    0    0
## [2,]    0    0
## [3,]    0    0
a * a
##      [,1] [,2]
## [1,]    1   16
## [2,]    4   25
## [3,]    9   36

Transpose & Matrix algebric multiplication

a
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
dim(a) # 3*2 matrix
## [1] 3 2
t(a)     
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
dim(t(a)) #2*3 matrix
## [1] 2 3
a   # 3*2 dimension
##      [,1] [,2]
## [1,]    1    4
## [2,]    2    5
## [3,]    3    6
t(a)  # 2*3 dimension
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
a %*% t(a) # 3*3 dimension
##      [,1] [,2] [,3]
## [1,]   17   22   27
## [2,]   22   29   36
## [3,]   27   36   45
t(a) %*% a # 2*2 dimension
##      [,1] [,2]
## [1,]   14   32
## [2,]   32   77

Run the regression with matrix algebra

N = nrow(Ritvars) # number of observations
X = matrix(0,nrow=N,ncol=3) #innitialize matrix X, N rows, 3 cols
Y = matrix(0,nrow=N,ncol=1) #innitialize matrix Y
X[,1] = Ritvars$DOSAGE # fill in first col: dosage
X[,2] = Ritvars$DOSAGE*Ritvars$FEMALE # fill in second col: interaction of dosage*female
X[,3] = 1 # fourth col: intercept 1
Y = Ritvars$IMPROVE # dependent variable: Improve
theta = solve(t(X)%*%X)%*%(t(X)%*%Y)
theta
##             [,1]
## [1,]  16.2156894
## [2,] -14.7050700
## [3,]   0.2502594
error = Y-X%*%theta # return a vector of residuals
head(error) # look at the residuals
##            [,1]
## [1,]  17.420249
## [2,]  -1.081100
## [3,] -11.017654
## [4,]   1.998641
## [5,]  -7.109496
## [6,]   2.420249
sample_error = sum(error^2)/(nrow(Ritvars)-3) # sum of residuals
sd_error = sqrt(diag(sample_error*(solve(t(X)%*%X)))) # vector of std.error
sd_error
## [1] 6.161655 8.979842 2.069824
t = theta/sd_error  # t statistics
p = 2*pt(-abs(t),df=N-3) # p statistics
p[2] # the p value for the interaction term
## [1] 0.1066615