# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Module 1 Day 3
# ~ Hypoxanthine Data
# ~ from: Analysis of
# ~ Biological Data Ch 17
# ~ Problem 14
# ~ Reference:
# ~ James, R. A., Hoadley, P. A., & Sampson, B. G. (1997).
# ~ Determination of postmortem interval by sampling vitreous
# ~ humour. American Journal of Forensic Medicine and
# ~ Pathology, 18(2), 158-162.
# ~ https://doi.org/10.1097/00000433-199706000-00010
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Estimation of postmortem interval (PMI) by analyses
# of vitreous humor has certain advantages over analyses
# of blood and cerebrospinal fluid (CSF). Certain
# substances, including potassium and hypoxanthine (Hx),
# have been shown to exhibit postmortem increase in
# concentration in vitreous humor in a linear fashion.
# In the present study, potassium and Hx concentrations
# were measured in 100 subjects with known PMIs.
# Simple linear regression analyses were performed on
# the data collected, and new equations for estimation
# of PMI were constructed.
#
# Data Dictionary -- TimeOfDeath.RData
#
# hours -- Hours post mortem (since death)
# hypoxanthine -- Hypoxanthine concentration (micromol/l)
#
## Download and load the data file = TimeOfDeath.RData
download.file("http://www.duke.edu/~sgrambow/crp241data/TimeOfDeath.RData",
destfile = "TimeOfDeath.RData",quiet=TRUE,mode="wb",cacheOK=FALSE)
load("TimeOfDeath.RData")
# Structure of Data
str(TimeOfDeath)
## 'data.frame': 48 obs. of 2 variables:
## $ hours : num 1.5 2.1 2.8 3.9 3.9 3.4 2.9 5.4 7.9 10 ...
## $ hypoxanthine: int 29 28 26 18 11 5 0 3 31 5 ...
# Descriptive Summary of Data
summary(TimeOfDeath)
## hours hypoxanthine
## Min. : 1.50 Min. : 0.00
## 1st Qu.:10.47 1st Qu.: 21.50
## Median :17.70 Median : 52.50
## Mean :20.78 Mean : 67.77
## 3rd Qu.:27.07 3rd Qu.: 97.50
## Max. :81.20 Max. :357.00
hist(TimeOfDeath$hypoxanthine)

boxplot(TimeOfDeath$hypoxanthine)

stripchart(TimeOfDeath$hypoxanthine, method="jitter")

# Fancy Stripchart
stripchart(TimeOfDeath$hypoxanthine,
main="Postmortem Hypoxanthine in the Vitreous Humour",
xlab="concentration (micromol/l)",
ylab="Hypoxanthine ",
method="jitter",
col="blue",
pch=1
)

# Scatter Plot of Hours since death vs Hypoxanthine
plot(TimeOfDeath$hypoxanthine,TimeOfDeath$hours)
# Here is a simple linear Regression
# modeling hours post mortem as a function of
# hypoxanthine concentration.
fit.h <- lm(hours ~ hypoxanthine,data=TimeOfDeath)
summary(fit.h)
##
## Call:
## lm(formula = hours ~ hypoxanthine, data = TimeOfDeath)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.2581 -5.6408 0.0767 7.0838 20.6384
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.82552 1.98866 3.432 0.00128 **
## hypoxanthine 0.20596 0.02165 9.512 1.95e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.299 on 46 degrees of freedom
## Multiple R-squared: 0.663, Adjusted R-squared: 0.6556
## F-statistic: 90.48 on 1 and 46 DF, p-value: 1.949e-12
# model yields slope coefficient for hypoxanthine = 0.21 with p <0.0001
# Interpretation
# For each 1 unit increase in hypoxanthine concentration,
# the mean time since death increases by .21 hours.
# we can use confint to generate confidence interval for slope
# estimate
confint((fit.h))
## 2.5 % 97.5 %
## (Intercept) 2.8225485 10.8284913
## hypoxanthine 0.1623725 0.2495396
# This yields
# 2.5 % 97.5 %
# (Intercept) 2.8225485 10.8284913
# hypoxanthine 0.1623725 0.2495396
#
# so we have a point estimate for the slope
# with 95% confidence interval
# 0.21 (0.16, .25)
#
# here is a basic scatter plot
# with fitted regression line overlay
plot(TimeOfDeath$hypoxanthine,TimeOfDeath$hours)
abline(fit.h)
# Here is a fancier scatter plot with regression line
# and (1) confidence bands and (2) prediction bands
# source code can be found here: https://rpubs.com/Bio-Geek/71339
library(ggplot2)

# Plotting 95% Confidence Interval
ggplot(TimeOfDeath, aes(x=hypoxanthine, y=hours))+
geom_point()+
geom_smooth(method=lm, se=TRUE)

# Plotting 95% Confidence Interval and Prediction Interval
# Notice that the confidence bands get wider for larger values
# of x reflecting the density of observed data points across the x axis
temp_var <- predict(fit.h, interval="prediction")
## Warning in predict.lm(fit.h, interval = "prediction"): predictions on current data refer to _future_ responses
newdata <- cbind(TimeOfDeath,temp_var)
ggplot(newdata, aes(hypoxanthine, hours))+
geom_point() +
geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
geom_line(aes(y=upr), color = "red", linetype = "dashed")+
geom_smooth(method=lm, se=TRUE)

# Generating predictions
# As we have done with previous data, we can
# generate predictions for the model using the
# predict function
# for predicted means with confidence intervals
# say for hypoxanthine=50
predict(fit.h,data.frame(hypoxanthine=50),interval = "confidence")
## fit lwr upr
## 1 17.12332 14.31274 19.9339
# Generates a point estimate and associated
# confidence interval
# fit lwr upr
# 1 17.12332 14.31274 19.9339
# for predicted values with prediction intervals
# say for hypoxanthine=50
predict(fit.h,data.frame(hypoxanthine=50),interval = "predict")
## fit lwr upr
## 1 17.12332 -1.804827 36.05147
# fit lwr upr
# 1 17.12332 -1.804827 36.05147
# Notice how much wider the prediction interval is!
# End of Program