env <- new.env()
load("../data.rda", envir = env)
par(mfrow = c(1, 2))
curve(100 - 50*exp(-2*x), xlim = c(0, 4), ylim = c(50, 100), xlab = "", ylab = "")
title("(a) Exponential Model (13.8)")
curve(10/(1 + 20*exp(-2*x)), xlim = c(0, 4), ylim = c(0, 10), xlab = "", ylab = "")
title("(b) Logistic Model (13.10)")
Choice of start values discussed on (p 521).
df <- get("CH13TA01", envir = env)
names(df) <- c("index", "days")
fit <- nls(index ~ a * exp(b * days), df, start = c(a = 56.6646, b = -0.03797))
cbind('Patient' = seq(15),
'Days Hospitalized' = df$days,
'Prognostic Index' = df$index)
## Patient Days Hospitalized Prognostic Index
## [1,] 1 2 54
## [2,] 2 5 50
## [3,] 3 7 45
## [4,] 4 10 37
## [5,] 5 14 35
## [6,] 6 19 25
## [7,] 7 26 20
## [8,] 8 31 16
## [9,] 9 34 18
## [10,] 10 38 13
## [11,] 11 45 8
## [12,] 12 52 11
## [13,] 13 53 8
## [14,] 14 60 4
## [15,] 15 65 6
plot(index ~ days, df, pch = 19)
curve(coef(fit)[1] * exp(coef(fit)[2] * x), add = TRUE)
TABLE 13.2 and TABLE 13.3 (p 522-4)
Below is a function suited for this example. It outputs part (a) under the list items “Estimate”. Under “LS” we have a column of the final chosen 'g' and a column for the MSE and final SSE. In “se” is a reproduction of the matrix for part ©. Included are also the fitted values, which can be matched against fitted(fit), and the Y and D matrices for TABLE 13.2. However, these are the final matrices, not the first. The approach here can manually be applied to reproduce TABLE 13.2 exactly. Note, the standard errors in (b) can be obtained from ©
nlm <- function(formula, data, g, FUN = function(x, a, b) a * exp(b * x), TOL = 1e-5)
{
YX <- model.frame(formula, data)
x <- YX[, -1]
y <- YX[, 1]
n <- length(y)
SSE <- NULL
i <- 1
G <- matrix(nrow = 50, ncol = 2)
repeat
{
G[i, ] = g
Y = as.matrix(y - FUN(x, g[1], g[2])) # (13.25a)
D = cbind(exp(g[2] * x), g[1] * x * exp(g[2] * x)) # (13.25b)
b = coef(lm(Y ~ D - 1)) # (13.25c)
SSE = c(SSE, sum(Y^2))
g <- g + b
is.done = ((length(SSE) > 1) && (SSE[i-1] - SSE[i] < TOL))
i = i + 1
if (is.done) break
}
G <- G[!is.na(G[,1]), ]
i = i-1
tab <- list()
tab$Estimates = cbind('g' = G, 'SSE' = SSE)
tab$LS = cbind(G[i, ], rbind(SSE[i-1] / (n-2), SSE[i-1]))
tab$se = (SSE[i] / (n-2)) * solve(t(D) %*% D)
tab$fitted = G[i, 1] * exp(G[i, 2] * x)
tab$Y = Y
tab$D = D
return(tab)
}
(coefs <- coef(lm(log(index) ~ days, df))) # Linear Transform Fit
## (Intercept) days
## 4.03716 -0.03797
(coefs <- c(exp(coefs[1]), coefs[2])) # Exp. Transformation
## (Intercept) days
## 56.66512 -0.03797
nlm(index ~ days, df, g = coefs)
## $Estimates
## SSE
## [1,] 56.67 -0.03797 56.08
## [2,] 58.56 -0.03953 49.46
## [3,] 58.61 -0.03958 49.46
## [4,] 58.61 -0.03959 49.46
##
## $LS
## [,1] [,2]
## [1,] 58.60653 3.805
## [2,] -0.03959 49.459
##
## $se
## [,1] [,2]
## [1,] 2.167253 -1.782e-03
## [2,] -0.001782 2.929e-06
##
## $fitted
## [1] 54.145 48.082 44.422 39.448 33.671 27.625 20.939 17.179 15.255 13.021
## [11] 9.870 7.481 7.191 5.450 4.472
##
## $Y
## [,1]
## [1,] -0.1454
## [2,] 1.9177
## [3,] 0.5777
## [4,] -2.4480
## [5,] 1.3290
## [6,] -2.6245
## [7,] -0.9387
## [8,] -1.1787
## [9,] 2.7450
## [10,] -0.0210
## [11,] -1.8696
## [12,] 3.5191
## [13,] 0.8095
## [14,] -1.4503
## [15,] 1.5285
##
## $D
## [,1] [,2]
## [1,] 0.9239 108.3
## [2,] 0.8204 240.4
## [3,] 0.7580 311.0
## [4,] 0.6731 394.5
## [5,] 0.5745 471.4
## [6,] 0.4714 524.9
## [7,] 0.3573 544.4
## [8,] 0.2931 532.5
## [9,] 0.2603 518.7
## [10,] 0.2222 494.8
## [11,] 0.1684 444.1
## [12,] 0.1276 389.0
## [13,] 0.1227 381.1
## [14,] 0.0930 327.0
## [15,] 0.0763 290.6
library(HH) # To test nonconstant error variance with Brown-Forsythe
plot(resid(fit) ~ fitted(fit), xlab = "Fitted Values", ylab = "Residual")
title("(a) Residual Plot against Fitted")
abline(0, 0)
qqnorm(resid(fit), xlab = "Expected Value", ylab = "Residual", main = "")
qqline(resid(fit))
title("(b) Normal Probability Plot")
abline(0, 0)
hov(resid(fit) ~ cut(df$days, 2)) # BF test. P-value = 0.64
##
## hov: Brown-Forsyth
##
## data: resid(fit)
## F = 0.225, df:cut(df$days, 2) = 1, df:Residuals = 13, p-value =
## 0.6431
## alternative hypothesis: variances are not identical
deviance(fit) / df.residual(fit) # (13.31)
## [1] 3.805
vcov(fit) # (13.32b)
## a b
## a 2.167253 -1.782e-03
## b -0.001782 2.929e-06
The plotting limits could be reduced so the output looks more like those in the text. Without those limits, these results show highly skewed portions of the bootstrap results.
library(boot)
boot.coef <- function(data, indices, g)
{
x <- data[indices, 2]
y <- data[indices, 1]
mod <- nls(y ~ a * exp(b*x), df, start = c(a = g[[1]], b = g[[2]]))
return(coef(mod))
}
# Warnings are produced
(z <- boot(df, boot.coef, R = 1000, g = coef(lm(log(index) ~ days, df))))
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = df, statistic = boot.coef, R = 1000, g = coef(lm(log(index) ~
## days, df)))
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 58.60653 -2.16e-01 1.630648
## t2* -0.03959 9.25e-05 0.001799
# NLS coefficients and Bootstrap CIs
coef(fit)
## a b
## 58.60653 -0.03959
boot.ci(z, index = 1)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = z, index = 1)
##
## Intervals :
## Level Normal Basic
## 95% (55.63, 62.02 ) (56.17, 62.73 )
##
## Level Percentile BCa
## 95% (54.48, 61.04 ) (54.60, 61.16 )
## Calculations and Intervals on Original Scale
boot.ci(z, index = 2)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = z, index = 2)
##
## Intervals :
## Level Normal Basic
## 95% (-0.0432, -0.0362 ) (-0.0436, -0.0363 )
##
## Level Percentile BCa
## 95% (-0.0429, -0.0356 ) (-0.0426, -0.0351 )
## Calculations and Intervals on Original Scale
g0 = expression(g[0]^symbol("\052"))
g1 = expression(g[1]^symbol("\052"))
hist(z$t[, 1], 40, freq = FALSE, xlab = g0, main = "")
title(expression("(a) Histogram of Bootstrap Estimates " * g[0]^symbol("\052")))
hist(z$t[, 2], 40, freq = FALSE, xlab = g1, main = "")
title(expression("(b) Histogram of Bootstrap Estimates " * g[1]^symbol("\052")))
# Simultaneous Interval Estimation (p 532)
s = sqrt(diag(vcov(fit)))
(coef(fit) + 2.16 * c(lwr = -s, upr = s))[c(1,3, 2,4)]
## lwr.a upr.a lwr.b upr.b
## 55.42667 61.78640 -0.04328 -0.03589
It should be noted that location is a qualitative factor. However, you cannot use factors in nls. However, later when plotting points of different factors, it is a handy utility to treat this variable as a factor when you specify it as the colors for the plot (see FIGURE 13.5). In that case, it is explicitly converted to a factor inline. This is also a possible choice when doing any linear model in R. The choice of storing or inline converting between factor and numeric (or character) is up to the analyst and their coding practices.
df <- get("CH13TA04", envir = env)
names(df) <- c("location", "week", "efficiency")
The use of xtabs lets you see the relative efficiency values of each week by location much more efficiently than merely printing out the long form table. Compare it with print(df).
xtabs(efficiency ~ week + location, df)
## location
## week 0 1
## 1 0.517 0.483
## 2 0.598 0.539
## 3 0.635 0.618
## 5 0.750 0.707
## 7 0.811 0.762
## 10 0.848 0.815
## 15 0.943 0.881
## 20 0.971 0.919
## 30 1.012 0.964
## 40 1.015 0.959
## 50 1.007 0.968
## 60 1.022 0.971
## 70 1.028 0.960
## 80 1.017 0.967
## 90 1.023 0.975
library(boot)
boot.coef <- function(data, indices, g)
{
y <- data[indices, 1]
x1 <- data[indices, 2]
x2 <- data[indices, 3]
mod <- nls(y ~ a + b*x1 + c*exp(d*x2), start = g)
return(coef(mod))
}
g <- c(a = 1.025, b = -0.0459, c = -0.5, d = -0.122)
fit <- nls(efficiency ~ a + b*location + c*exp(d*week), df, start = g)
z <- boot(df[c(3, 1:2)], boot.coef, R = 1000, g = g)
cbind(
'g' = g,
'(1)' = summary(fit)$coef[, 1],
'(2)' = summary(fit)$coef[, 2],
'(3)' = apply(z$t, 2, mean),
'(4)' = apply(z$t, 2, sd))
## g (1) (2) (3) (4)
## a 1.0250 1.01560 0.003672 1.01589 0.003065
## b -0.0459 -0.04727 0.004109 -0.04756 0.004230
## c -0.5000 -0.55244 0.008157 -0.55099 0.010210
## d -0.1220 -0.13480 0.004360 -0.13416 0.005538
# Residual Plots That Were Not Shown
par(mfrow = c(2, 2))
plot(resid(fit) ~ fitted(fit))
title("Residuals aganist Fitted")
plot(resid(fit) ~ location, df)
title("Residuals against Location")
plot(resid(fit) ~ week, df)
title("Residuals against Week")
qqnorm(resid(fit), main = "")
qqline(resid(fit))
title("Normal Probability Plot")
p <- coef(fit)
plot(efficiency ~ week, df, col = factor(location), pch = 19)
curve(p[1] + p[2]*(0) + p[3] * exp(p[4] * x), to = 90, add = T) # Location = 0
curve(p[1] + p[2]*(1) + p[3] * exp(p[4] * x), to = 90, add = T) # Location = 1
Histograms suffer from showing continuous phenomena by artificially binning them. Included below each histogram is a kernel density plot of the same information, presented as a continuous plot, demonstrating the density distribution of the data. This approach can be modified by altering the kernel density function. See the density help files for more details.
par(mfcol = c(2,4))
for(i in seq(4)) {
hist(z$t[, i], 50, freq = FALSE, main = "", xlab = paste("g", i, sep = ""))
title(paste("(", letters[i], ")", sep = ""))
plot(density(z$t[, i]), xlab = paste("g", i, sep = ""), main = "")
title(paste("(", letters[i], ")", sep = ""))
}
The scale cannot possibly be the same for all lines plotted. When, on this notation, b = 0.1 the range of x values needs to be around -50:50 to see f(x) values near 0 and 1 as shown in (a).
f <- function(x, a, b) {(1 + exp(-a - b*x))^(-1)}
par(mfrow = c(1, 3), lwd = 2)
curve(f(x, a = 0, b = 0.1), -10, 10, ylim = c(0, 1))
curve(f(x, a = 0, b = 1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a = 0, b = 10), -10, 10, add = TRUE, lty = 3)
title("(a)")
curve(f(x, a = 0, b = -0.1), -10, 10, ylim = c(0, 1))
curve(f(x, a = 0, b = -1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a = 0, b = -10), -10, 10, add = TRUE, lty = 3)
title("(b)")
curve(f(x, a = 5, b = 1), -10, 10, ylim = c(0, 1))
curve(f(x, a = 0, b = 1), -10, 10, add = TRUE, lty = 5)
curve(f(x, a = -5, b = 1), -10, 10, add = TRUE, lty = 3)
title("(c)")
This section is INCOMPLETE. Neural Networks are still unclear to me at the time of this writing. When I am more educated, this example will be more complete. Until then, I can only offer my apologies!
df <- get("APPENC09", envir = env)
names(df) <- c("id", "cost", "age", "gender", "intervention", "drugs",
"visits", "complications", "comorbidities", "duration")
library(nnet)
indices <- sample(seq(nrow(df)), 400)
df.test <- df[indices, ]
df.valid <- df[-indices, ]
fit <- nnet(log(cost) ~ intervention + drugs + comorbidities + complications, df.test, size = 5, maxit = 50)