Chapter 13 – Introduction to Nonlinear Regression and Neural Networks

Load the data sets

env <- new.env()
load("../data.rda", envir = env)

FIGURE 13.1 (p 512)

Plots of Exponential and Logistic Response Functions

par(mfrow = c(1, 2))
curve(100 - 50*exp(-2*x), xlim = c(0, 4), ylim = c(50, 100), xlab = "", ylab = "")
title("(a) Exponential Model (13.8)")

curve(10/(1 + 20*exp(-2*x)), xlim = c(0, 4), ylim = c(0, 10), xlab = "", ylab = "")
title("(b) Logistic Model (13.10)")

plot of chunk example-plots

Input Severely Injured Patients Data

Choice of start values discussed on (p 521).

df <- get("CH13TA01", envir = env)
names(df) <- c("index", "days")
fit <- nls(index ~ a * exp(b * days), df, start = c(a = 56.6646, b = -0.03797))

TABLE 13.1 (p 515)

Data–Severely Injured Patients Example

cbind('Patient'           = seq(15),
      'Days Hospitalized' = df$days,
      'Prognostic Index'  = df$index)

##       Patient Days Hospitalized Prognostic Index
##  [1,]       1                 2               54
##  [2,]       2                 5               50
##  [3,]       3                 7               45
##  [4,]       4                10               37
##  [5,]       5                14               35
##  [6,]       6                19               25
##  [7,]       7                26               20
##  [8,]       8                31               16
##  [9,]       9                34               18
## [10,]      10                38               13
## [11,]      11                45                8
## [12,]      12                52               11
## [13,]      13                53                8
## [14,]      14                60                4
## [15,]      15                65                6

FIGURE 13.2 (p 515)

Scatter Plot and Fitted Nonlinear Regression Function–Severely Injured Patients Example

plot(index ~ days, df, pch = 19)
curve(coef(fit)[1] * exp(coef(fit)[2] * x), add = TRUE)

plot of chunk unnamed-chunk-4

TABLE 13.2 and TABLE 13.3 (p 522-4)

Y(0) and D(0) Matrices–Severely Injured Patients Example

Gauss-Newton Method Iterations and Final Nonlinear Least Squares Estimates–Severely Injured Patients Example

Below is a function suited for this example. It outputs part (a) under the list items “Estimate”. Under “LS” we have a column of the final chosen 'g' and a column for the MSE and final SSE. In “se” is a reproduction of the matrix for part ©. Included are also the fitted values, which can be matched against fitted(fit), and the Y and D matrices for TABLE 13.2. However, these are the final matrices, not the first. The approach here can manually be applied to reproduce TABLE 13.2 exactly. Note, the standard errors in (b) can be obtained from ©

nlm <- function(formula, data, g, FUN = function(x, a, b) a * exp(b * x), TOL = 1e-5)
{
  YX  <- model.frame(formula, data)
  x   <- YX[, -1]
  y   <- YX[, 1]
  n   <- length(y)
  SSE <- NULL
  i   <- 1
  G   <- matrix(nrow = 50, ncol = 2)

  repeat
  {
      G[i, ] = g
      Y   = as.matrix(y - FUN(x, g[1], g[2]))               # (13.25a)
      D   = cbind(exp(g[2] * x), g[1] * x * exp(g[2] * x))  # (13.25b)
      b   = coef(lm(Y ~ D - 1))                             # (13.25c)
      SSE = c(SSE, sum(Y^2))
      g <- g + b
      is.done = ((length(SSE) > 1) && (SSE[i-1] - SSE[i] < TOL))
      i = i + 1
      if (is.done) break
  }

  G <- G[!is.na(G[,1]), ]
  i = i-1

  tab <- list()
  tab$Estimates = cbind('g' = G, 'SSE' = SSE)
  tab$LS        = cbind(G[i, ], rbind(SSE[i-1] / (n-2), SSE[i-1]))
  tab$se        = (SSE[i] / (n-2)) * solve(t(D) %*% D)
  tab$fitted    = G[i, 1] * exp(G[i, 2] * x)
  tab$Y         = Y
  tab$D         = D

  return(tab)
}

(coefs <- coef(lm(log(index) ~ days, df)))  # Linear Transform Fit

## (Intercept)        days 
##     4.03716    -0.03797

(coefs <- c(exp(coefs[1]), coefs[2]))       # Exp. Transformation

## (Intercept)        days 
##    56.66512    -0.03797

nlm(index ~ days, df, g = coefs)

## $Estimates
##                       SSE
## [1,] 56.67 -0.03797 56.08
## [2,] 58.56 -0.03953 49.46
## [3,] 58.61 -0.03958 49.46
## [4,] 58.61 -0.03959 49.46
## 
## $LS
##          [,1]   [,2]
## [1,] 58.60653  3.805
## [2,] -0.03959 49.459
## 
## $se
##           [,1]       [,2]
## [1,]  2.167253 -1.782e-03
## [2,] -0.001782  2.929e-06
## 
## $fitted
##  [1] 54.145 48.082 44.422 39.448 33.671 27.625 20.939 17.179 15.255 13.021
## [11]  9.870  7.481  7.191  5.450  4.472
## 
## $Y
##          [,1]
##  [1,] -0.1454
##  [2,]  1.9177
##  [3,]  0.5777
##  [4,] -2.4480
##  [5,]  1.3290
##  [6,] -2.6245
##  [7,] -0.9387
##  [8,] -1.1787
##  [9,]  2.7450
## [10,] -0.0210
## [11,] -1.8696
## [12,]  3.5191
## [13,]  0.8095
## [14,] -1.4503
## [15,]  1.5285
## 
## $D
##         [,1]  [,2]
##  [1,] 0.9239 108.3
##  [2,] 0.8204 240.4
##  [3,] 0.7580 311.0
##  [4,] 0.6731 394.5
##  [5,] 0.5745 471.4
##  [6,] 0.4714 524.9
##  [7,] 0.3573 544.4
##  [8,] 0.2931 532.5
##  [9,] 0.2603 518.7
## [10,] 0.2222 494.8
## [11,] 0.1684 444.1
## [12,] 0.1276 389.0
## [13,] 0.1227 381.1
## [14,] 0.0930 327.0
## [15,] 0.0763 290.6

FIGURE 13.3 (p 527)

Diagnostic Residual Plots–Severely Injured Patients Example

library(HH)  # To test nonconstant error variance with Brown-Forsythe
plot(resid(fit) ~ fitted(fit), xlab = "Fitted Values", ylab = "Residual")
title("(a) Residual Plot against Fitted")
abline(0, 0)

plot of chunk diag-plots


qqnorm(resid(fit), xlab = "Expected Value", ylab = "Residual", main = "")
qqline(resid(fit))
title("(b) Normal Probability Plot")
abline(0, 0)

plot of chunk diag-plots


hov(resid(fit) ~ cut(df$days, 2))  # BF test. P-value = 0.64

## 
##  hov: Brown-Forsyth
## 
## data:  resid(fit) 
## F = 0.225, df:cut(df$days, 2) = 1, df:Residuals = 13, p-value =
## 0.6431
## alternative hypothesis: variances are not identical

deviance(fit) / df.residual(fit)   # (13.31)

## [1] 3.805

vcov(fit)                          # (13.32b)

##           a          b
## a  2.167253 -1.782e-03
## b -0.001782  2.929e-06

FIGURE 13.4 (p 531)

Bootstrap Sampling Distribution–Severely Injured Patients Example

The plotting limits could be reduced so the output looks more like those in the text. Without those limits, these results show highly skewed portions of the bootstrap results.

library(boot)

boot.coef <- function(data, indices, g)
{
  x   <- data[indices, 2]
  y   <- data[indices, 1]
  mod <- nls(y ~ a * exp(b*x), df, start = c(a = g[[1]], b = g[[2]]))

  return(coef(mod))
}

# Warnings are produced
(z <- boot(df, boot.coef, R = 1000, g = coef(lm(log(index) ~ days, df))))

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = df, statistic = boot.coef, R = 1000, g = coef(lm(log(index) ~ 
##     days, df)))
## 
## 
## Bootstrap Statistics :
##     original    bias    std. error
## t1* 58.60653 -2.16e-01    1.630648
## t2* -0.03959  9.25e-05    0.001799


# NLS coefficients and Bootstrap CIs
coef(fit)

##        a        b 
## 58.60653 -0.03959

boot.ci(z, index = 1)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = z, index = 1)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   (55.63, 62.02 )   (56.17, 62.73 )  
## 
## Level     Percentile            BCa          
## 95%   (54.48, 61.04 )   (54.60, 61.16 )  
## Calculations and Intervals on Original Scale

boot.ci(z, index = 2)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = z, index = 2)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   (-0.0432, -0.0362 )   (-0.0436, -0.0363 )  
## 
## Level     Percentile            BCa          
## 95%   (-0.0429, -0.0356 )   (-0.0426, -0.0351 )  
## Calculations and Intervals on Original Scale



g0 = expression(g[0]^symbol("\052"))
g1 = expression(g[1]^symbol("\052"))
hist(z$t[, 1], 40, freq = FALSE, xlab = g0, main = "")
title(expression("(a) Histogram of Bootstrap Estimates " * g[0]^symbol("\052")))

plot of chunk boot


hist(z$t[, 2], 40, freq = FALSE, xlab = g1, main = "")
title(expression("(b) Histogram of Bootstrap Estimates " * g[1]^symbol("\052")))

plot of chunk boot


# Simultaneous Interval Estimation (p 532)
s = sqrt(diag(vcov(fit)))
(coef(fit) + 2.16 * c(lwr = -s, upr = s))[c(1,3, 2,4)]

##    lwr.a    upr.a    lwr.b    upr.b 
## 55.42667 61.78640 -0.04328 -0.03589

Input Learning Curve Data

It should be noted that location is a qualitative factor. However, you cannot use factors in nls. However, later when plotting points of different factors, it is a handy utility to treat this variable as a factor when you specify it as the colors for the plot (see FIGURE 13.5). In that case, it is explicitly converted to a factor inline. This is also a possible choice when doing any linear model in R. The choice of storing or inline converting between factor and numeric (or character) is up to the analyst and their coding practices.

df <- get("CH13TA04", envir = env)
names(df) <- c("location", "week", "efficiency")

TABLE 13.4 (p 534)

Data–Learning Curve Example

The use of xtabs lets you see the relative efficiency values of each week by location much more efficiently than merely printing out the long form table. Compare it with print(df).

xtabs(efficiency ~ week + location, df)

##     location
## week     0     1
##   1  0.517 0.483
##   2  0.598 0.539
##   3  0.635 0.618
##   5  0.750 0.707
##   7  0.811 0.762
##   10 0.848 0.815
##   15 0.943 0.881
##   20 0.971 0.919
##   30 1.012 0.964
##   40 1.015 0.959
##   50 1.007 0.968
##   60 1.022 0.971
##   70 1.028 0.960
##   80 1.017 0.967
##   90 1.023 0.975

TABLE 13.5 (p 535)

Nonlinear Least Squares Estimates and Standard Deviations and Bootstrap Results–Learning Curve Example

library(boot)
boot.coef <- function(data, indices, g) 
{
  y   <- data[indices, 1]
  x1  <- data[indices, 2]
  x2  <- data[indices, 3]
  mod <- nls(y ~ a  + b*x1 + c*exp(d*x2), start = g)

  return(coef(mod))
}

g   <- c(a = 1.025, b = -0.0459, c = -0.5, d = -0.122)
fit <- nls(efficiency ~ a + b*location + c*exp(d*week), df, start = g)
z   <- boot(df[c(3, 1:2)], boot.coef, R = 1000, g = g)

cbind(
  'g'   = g,
  '(1)' = summary(fit)$coef[, 1],
  '(2)' = summary(fit)$coef[, 2],
  '(3)' = apply(z$t, 2, mean),
  '(4)' = apply(z$t, 2, sd))

##         g      (1)      (2)      (3)      (4)
## a  1.0250  1.01560 0.003672  1.01589 0.003065
## b -0.0459 -0.04727 0.004109 -0.04756 0.004230
## c -0.5000 -0.55244 0.008157 -0.55099 0.010210
## d -0.1220 -0.13480 0.004360 -0.13416 0.005538


# Residual Plots That Were Not Shown
par(mfrow = c(2, 2))
plot(resid(fit) ~ fitted(fit))
title("Residuals aganist Fitted")

plot(resid(fit) ~ location, df)
title("Residuals against Location")

plot(resid(fit) ~ week, df)
title("Residuals against Week")

qqnorm(resid(fit), main = "")
qqline(resid(fit))
title("Normal Probability Plot")

plot of chunk unnamed-chunk-8

FIGURE 13.5 (p 534)

Scatter Plot and Fitted Nonlinear Regression Functions–Learning Curve Example

p <- coef(fit)
plot(efficiency ~ week, df, col = factor(location), pch = 19)

curve(p[1] + p[2]*(0) + p[3] * exp(p[4] * x), to = 90, add = T)  # Location = 0
curve(p[1] + p[2]*(1) + p[3] * exp(p[4] * x), to = 90, add = T)  # Location = 1

plot of chunk unnamed-chunk-9

FIGURE 13.6 (p 536)

Histograms of Bootstrap Sampling Distributions–Learning Curve Example

Histograms suffer from showing continuous phenomena by artificially binning them. Included below each histogram is a kernel density plot of the same information, presented as a continuous plot, demonstrating the density distribution of the data. This approach can be modified by altering the kernel density function. See the density help files for more details.

par(mfcol = c(2,4))
for(i in seq(4)) {
  hist(z$t[, i], 50, freq = FALSE, main = "", xlab = paste("g", i, sep = ""))
  title(paste("(", letters[i], ")", sep = ""))

  plot(density(z$t[, i]), xlab = paste("g", i, sep = ""), main = "")
  title(paste("(", letters[i], ")", sep = ""))
}

plot of chunk boot-hist

FIGURE 13.7 (p 539)

Various Logistic Activation Functions for Single Predictor

The scale cannot possibly be the same for all lines plotted. When, on this notation, b = 0.1 the range of x values needs to be around -50:50 to see f(x) values near 0 and 1 as shown in (a).

f <- function(x, a, b) {(1 + exp(-a - b*x))^(-1)}

par(mfrow = c(1, 3), lwd = 2)
curve(f(x, a =  0, b = 0.1), -10, 10, ylim = c(0, 1))
curve(f(x, a =  0, b =   1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a =  0, b =  10), -10, 10, add = TRUE, lty = 3)
title("(a)")

curve(f(x, a =  0, b = -0.1), -10, 10, ylim = c(0, 1))
curve(f(x, a =  0, b =   -1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a =  0, b =  -10), -10, 10, add = TRUE, lty = 3)
title("(b)")

curve(f(x, a =  5, b = 1), -10, 10, ylim = c(0, 1))
curve(f(x, a =  0, b = 1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a = -5, b = 1), -10, 10, add = TRUE, lty = 3)
title("(c)")

plot of chunk log-plot

Input Ischemic Heart Disease (IHD) Data

This section is INCOMPLETE. Neural Networks are still unclear to me at the time of this writing. When I am more educated, this example will be more complete. Until then, I can only offer my apologies!

df <- get("APPENC09", envir = env)
names(df) <- c("id", "cost", "age", "gender", "intervention", "drugs", 
               "visits", "complications", "comorbidities", "duration")

library(nnet)
indices  <- sample(seq(nrow(df)), 400)
df.test  <- df[indices, ]
df.valid <- df[-indices, ]
fit <- nnet(log(cost) ~ intervention + drugs + comorbidities + complications, df.test, size = 5, maxit = 50)