## ggplot2 for plotting
library(ggplot2)
## for unit()
library(gridExtra)
## Define n-th power of x functions (n>=1)
fx <- function(x) x^1.0
fxOneAndHalfPower <- function(x) x^1.5
fxSquared <- function(x) x^2.0
fxTwoAndHalfPower <- function(x) x^2.5
fxCubed <- function(x) x^3.0
## Define n-th power of x functions (n<=1)
fxPointEightPower <- function(x) x^0.8
fxHalfPower <- function(x) x^0.5
fxOneThirdPower <- function(x) x^0.3
fxOneTenthPower <- function(x) x^0.1
## Define negative powers of x functions
fxNegHalfPower <- function(x) x^(-0.5)
fxNegOnePower <- function(x) x^(-1.0)
fxNegTwoPower <- function(x) x^(-2.0)
## Define non-power functions
fxExp <- function(x) exp(x)
fxNegExp <- function(x) exp(-x)
fxLog <- function(x) log(x)
## Define a helper function
stat_fun <- function(fun, mapping) {
stat_function(fun = fun, mapping = mapping, n = 1001)
}
## Provide base for plots
plotBase <- ggplot(data = data.frame(x = -1:3),
mapping = aes(x = x)) +
labs(color = "Functions", linetype = "Functions") +
scale_y_continuous(limit = c(-2,5)) +
theme_bw() +
theme(legend.key = element_blank(),
legend.key.width = unit(1, "cm"))
## n-th power of x functions (n>=1)
plotBase +
stat_fun(fun = fx, mapping = aes(linetype = "x^1.0")) +
stat_fun(fun = fxOneAndHalfPower, mapping = aes(linetype = "x^1.5")) +
stat_fun(fun = fxSquared, mapping = aes(linetype = "x^2.0")) +
stat_fun(fun = fxTwoAndHalfPower, mapping = aes(linetype = "x^2.5")) +
stat_fun(fun = fxCubed, mapping = aes(linetype = "x^3.0"))
## n-th power of x functions (n<=1)
plotBase +
stat_fun(fun = fx, mapping = aes(linetype = "x^1.0")) +
stat_fun(fun = fxPointEightPower, mapping = aes(linetype = "x^0.8")) +
stat_fun(fun = fxHalfPower, mapping = aes(linetype = "x^0.5")) +
stat_fun(fun = fxOneThirdPower, mapping = aes(linetype = "x^0.3")) +
stat_fun(fun = fxOneTenthPower, mapping = aes(linetype = "x^0.1")) +
scale_linetype_manual(values = c("x^1.0" = 1,"x^0.8" = 2,"x^0.5" = 3,"x^0.3" = 4,"x^0.1" = 5),
guide = guide_legend(reverse = TRUE))
## Negative powers of x functions
plotBase +
stat_fun(fun = fxNegHalfPower, mapping = aes(linetype = "x^(-0.5)")) +
stat_fun(fun = fxNegOnePower, mapping = aes(linetype = "x^(-1.0)")) +
stat_fun(fun = fxNegTwoPower, mapping = aes(linetype = "x^(-2.0)"))
## Non-power functions
plotBase +
stat_fun(fun = fxExp, mapping = aes(linetype = "exp(x)")) +
stat_fun(fun = fxNegExp, mapping = aes(linetype = "exp(-x)")) +
stat_fun(fun = fxLog, mapping = aes(linetype = "log(x)"))
## Create data
aidsData <- read.table(header = TRUE, text = "
year yearSince1976 newAidsCases personYears hivRateEstimate
1979 3 0 362 0
1980 4 0 360 100
1981 5 4 355 519
1982 6 10 353 76
1983 7 46 351 728
1984 8 92 349 1744
1985 9 201 348 433
1986 10 329 348 65
1987 11 456 348 1820
1988 12 476 347 111
1989 13 606 347 2013
1990 14 642 348 192
1991 15 791 344 57
1992 16 645 339 119
")
aidsData$personYears <- aidsData$personYears*1000
## Check
print(aidsData, row.names = F)
## year yearSince1976 newAidsCases personYears hivRateEstimate
## 1979 3 0 362000 0
## 1980 4 0 360000 100
## 1981 5 4 355000 519
## 1982 6 10 353000 76
## 1983 7 46 351000 728
## 1984 8 92 349000 1744
## 1985 9 201 348000 433
## 1986 10 329 348000 65
## 1987 11 456 348000 1820
## 1988 12 476 347000 111
## 1989 13 606 347000 2013
## 1990 14 642 348000 192
## 1991 15 791 344000 57
## 1992 16 645 339000 119
plot(hivRateEstimate ~ yearSince1976, data = aidsData, type = "l")
library(ggplot2)
qplot(x = year, y = hivRateEstimate, data = aidsData, geom = "line")
aidsData$yearCat <- cut(aidsData$year,
breaks = c(-Inf,1980,1982,1984,1987,Inf),
labels = c("-1980","1981-1982","1983-1984","1985-1987","1988-"))
## glm1Categorical <- glm(formula = newAidsCases ~ yearCat + offset(log(personYears)),
## family = poisson(link = "log"),
## data = aidsData)
glm1Categorical <- glm(formula = hivRateEstimate ~ yearCat,
family = poisson(link = "log"),
data = aidsData)
newdata <- data.frame(year = seq(from = 1979, to = 1992, by = 0.05))
newdata$yearCat <- cut(newdata$year,
breaks = c(-Inf,1980,1982,1984,1987,Inf),
labels = c("-1980","1981-1982","1983-1984","1985-1987","1988-"))
## newdata <- data.frame(yearCat = levels(aidsData$yearCat), personYears = 100000)
## newdata
out <- predict(glm1Categorical, newdata = newdata)
## out
newdata$predCount <- round(exp(out), 3)
## newdata
ggplot(data = newdata,
mapping = aes(x = year, y = predCount)) +
layer(geom = "line") +
theme_bw() +
theme(legend.key = element_blank())
## Fractional polynomial
glm2Fp <- glm(formula = hivRateEstimate ~ I(year^(1/2)) + year + I(year^(3/2) + I(year^2)),
family = poisson(link = "log"),
data = aidsData)
out <- predict(glm2Fp, newdata = newdata)
## out
newdata$predCount <- round(exp(out), 3)
## newdata
ggplot(data = newdata,
mapping = aes(x = year, y = predCount)) +
layer(geom = "line") +
theme_bw() +
theme(legend.key = element_blank())
## Linear Spline
aidsData <- within(aidsData, {
year1981 <- pmax(year - 1981, 0)
year1983 <- pmax(year - 1983, 0)
year1985 <- pmax(year - 1985, 0)
year1988 <- pmax(year - 1988, 0)
})
aidsData
## year yearSince1976 newAidsCases personYears hivRateEstimate yearCat year1988 year1985 year1983 year1981
## 1 1979 3 0 362000 0 -1980 0 0 0 0
## 2 1980 4 0 360000 100 -1980 0 0 0 0
## 3 1981 5 4 355000 519 1981-1982 0 0 0 0
## 4 1982 6 10 353000 76 1981-1982 0 0 0 1
## 5 1983 7 46 351000 728 1983-1984 0 0 0 2
## 6 1984 8 92 349000 1744 1983-1984 0 0 1 3
## 7 1985 9 201 348000 433 1985-1987 0 0 2 4
## 8 1986 10 329 348000 65 1985-1987 0 1 3 5
## 9 1987 11 456 348000 1820 1985-1987 0 2 4 6
## 10 1988 12 476 347000 111 1988- 0 3 5 7
## 11 1989 13 606 347000 2013 1988- 1 4 6 8
## 12 1990 14 642 348000 192 1988- 2 5 7 9
## 13 1991 15 791 344000 57 1988- 3 6 8 10
## 14 1992 16 645 339000 119 1988- 4 7 9 11
glm2Ls <- glm(formula = hivRateEstimate ~ year + year1981 + year1983 + year1985 + year1988,
family = poisson(link = "log"),
data = aidsData)
newdata <- within(newdata, {
year1981 <- pmax(year - 1981, 0)
year1983 <- pmax(year - 1983, 0)
year1985 <- pmax(year - 1985, 0)
year1988 <- pmax(year - 1988, 0)
})
out <- predict(glm2Ls, newdata = newdata)
## out
newdata$predCount <- round(exp(out), 3)
## newdata
ggplot(data = newdata,
mapping = aes(x = year, y = predCount)) +
layer(geom = "line") +
scale_y_log10() +
theme_bw() +
theme(legend.key = element_blank())
## Qubic Spline
glm2Qs <- glm(formula = hivRateEstimate ~ year + I(year^2) + I(year1981^2) + I(year1983^2) + I(year1985^2) + I(year1988^2),
family = poisson(link = "log"),
data = aidsData)
newdata <- data.frame(year = seq(from = 1979, to = 1992, by = 0.05))
newdata <- within(newdata, {
year1981 <- pmax(year - 1981, 0)
year1983 <- pmax(year - 1983, 0)
year1985 <- pmax(year - 1985, 0)
year1988 <- pmax(year - 1988, 0)
})
out <- predict(glm2Qs, newdata = newdata)
newdata$predCount <- round(exp(out), 3)
ggplot(data = newdata,
mapping = aes(x = year, y = predCount)) +
layer(geom = "line") +
scale_y_log10() +
theme_bw() +
theme(legend.key = element_blank())
## Load library
library(mfp)
## Load data and fix error
data(bodyfat)
bodyfat$height[bodyfat$case==42] <- 69.5 # apparent error
bodyfat <- bodyfat[-which(bodyfat$case==39),] # cp. Royston $amp$ Sauerbrei, 2004
summary(bodyfat)
## case brozek siri density age weight height
## Min. : 1.0 Min. : 0.0 Min. : 0.0 Min. :0.995 Min. :22.0 Min. :118 Min. :64.0
## 1st Qu.: 64.5 1st Qu.:12.8 1st Qu.:12.4 1st Qu.:1.042 1st Qu.:35.5 1st Qu.:159 1st Qu.:68.2
## Median :127.0 Median :19.0 Median :19.2 Median :1.055 Median :43.0 Median :176 Median :70.0
## Mean :126.8 Mean :18.9 Mean :19.1 Mean :1.056 Mean :44.9 Mean :178 Mean :70.3
## 3rd Qu.:189.5 3rd Qu.:24.6 3rd Qu.:25.2 3rd Qu.:1.070 3rd Qu.:54.0 3rd Qu.:197 3rd Qu.:72.2
## Max. :252.0 Max. :45.1 Max. :47.5 Max. :1.109 Max. :81.0 Max. :263 Max. :77.8
## neck chest abdomen hip thigh knee ankle
## Min. :31.1 Min. : 79.3 Min. : 69.4 Min. : 85.0 Min. :47.2 Min. :33.0 Min. :19.1
## 1st Qu.:36.4 1st Qu.: 94.3 1st Qu.: 84.5 1st Qu.: 95.5 1st Qu.:56.0 1st Qu.:37.0 1st Qu.:22.0
## Median :38.0 Median : 99.6 Median : 90.9 Median : 99.3 Median :59.0 Median :38.5 Median :22.8
## Mean :37.9 Mean :100.7 Mean : 92.3 Mean : 99.7 Mean :59.3 Mean :38.5 Mean :23.1
## 3rd Qu.:39.4 3rd Qu.:105.3 3rd Qu.: 99.2 3rd Qu.:103.3 3rd Qu.:62.3 3rd Qu.:39.9 3rd Qu.:24.0
## Max. :43.9 Max. :128.3 Max. :126.2 Max. :125.6 Max. :74.4 Max. :46.0 Max. :33.9
## biceps forearm wrist
## Min. :24.8 Min. :21.0 Min. :15.8
## 1st Qu.:30.2 1st Qu.:27.3 1st Qu.:17.6
## Median :32.0 Median :28.7 Median :18.3
## Mean :32.2 Mean :28.7 Mean :18.2
## 3rd Qu.:34.3 3rd Qu.:30.0 3rd Qu.:18.8
## Max. :39.1 Max. :34.9 Max. :21.4
## siri Percent body fat using Siri's equation: 495/Density - 450
## Fit
mfpDf4 <- mfp(siri ~ fp(age, df = 4, select = 0.1) +
fp(weight, df = 4, select = 0.1) +
fp(height, df = 4, select = 0.1),
family = gaussian, data = bodyfat)
## Show
summary(mfpDf4)
##
## Call:
## glm(formula = siri ~ I((weight/100)^-0.5) + I((height/100)^1) +
## I((age/100)^1), family = gaussian, data = bodyfat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.824 -3.720 -0.361 3.779 12.852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 203.13 14.84 13.69 < 2e-16 ***
## I((weight/100)^-0.5) -123.92 6.90 -17.96 < 2e-16 ***
## I((height/100)^1) -136.90 15.61 -8.77 3.0e-16 ***
## I((age/100)^1) 12.99 2.75 4.72 3.9e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 27.76)
##
## Null deviance: 17320.4 on 250 degrees of freedom
## Residual deviance: 6855.6 on 247 degrees of freedom
## AIC: 1552
##
## Number of Fisher Scoring iterations: 2
newdata <- data.frame(age = 43.00, weight = seq(from = 118.5, to = 262.8, by = 0.5), height = 70.00)
head(newdata)
## age weight height
## 1 43 118.5 70
## 2 43 119.0 70
## 3 43 119.5 70
## 4 43 120.0 70
## 5 43 120.5 70
## 6 43 121.0 70
newdata$predPercentFat <- predict(object = mfpDf4, newdata = newdata)
library(ggplot2)
ggplot(data = newdata,
mapping = aes(x = weight, y = predPercentFat)) +
layer(geom = "line") +
layer(data = bodyfat, geom = "point", mapping = aes(y = siri)) +
theme_bw() +
theme(legend.key = element_blank())