Multivariate Statistical Analysis

breadth	height	length	nasal_length	period
131	138	89	49	1
125	131	92	48	1
131	132	99	50	1
119	132	96	44	1
136	143	100	54	1
138	137	89	56	1

a. Construct a one-way MANOVA of the Egyptian skull data. State you null hypothesis, alternative hypothesis, conclusion of the test(Wilk’s). Use \(\alpha = 0.05\). You can use manova() function.

\(H_{0}: \mu_{1} = \mu_{2} = ... = \mu_{n} = 0\)
\(H_{a}:\) at least one \(\mu_{i} \neq 0\) for i = 1, 2, …, n
\(\alpha = 0.05\)

crit <- qf(0.95, df1 = 8, df2 = 168)
t1 <- manova(as.matrix(df1[,1:4])~factor(df1[,5]))
wilksum <- summary(t1, "Wilks"); wilksum

##                  Df  Wilks approx F num Df den Df  Pr(>F)  
## factor(df1[, 5])  2 0.8301   2.0491      8    168 0.04358 *
## Residuals        87                                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

aovsum <- summary.aov(t1); aovsum

##  Response breadth :
##                  Df Sum Sq Mean Sq F value  Pr(>F)  
## factor(df1[, 5])  2  150.2  75.100  3.6595 0.02979 *
## Residuals        87 1785.4  20.522                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response height :
##                  Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5])  2   20.6  10.300  0.4657 0.6293
## Residuals        87 1924.3  22.118               
## 
##  Response length :
##                  Df  Sum Sq Mean Sq F value  Pr(>F)  
## factor(df1[, 5])  2  190.29  95.144  3.8447 0.02512 *
## Residuals        87 2153.00  24.747                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nasal_length :
##                  Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5])  2   2.02  1.0111  0.1047 0.9007
## Residuals        87 840.20  9.6575

f <- unlist(wilksum[4])[5]
pval <- unlist(wilksum[4])[11]

Critical Value: F-Statistic > 1.9938839
Conclusion: With a p-value of 0.0435825, we reject the null with statistically significant evidence at the confidence level \(\alpha = 0.05\). That is, the one-way MANOVA results suggest that a time effect is at play.

b. (STAT488) Are the usual MANOVA assumptions realistic for these data? Explain.

alpha <- 0.05
n <- nrow(df1)
p1 <- df1 %>% filter(period == 1) %>% select(1:4)
n1 <- length(p1)
p2 <- df1 %>% filter(period == 2) %>% select(1:4)
n2 <- length(p2)
p3 <- df1 %>% filter(period == 3) %>% select(1:4)
n3 <- length(p3)

xbar1 <- colMeans(p1)
xbar2 <- colMeans(p2)
xbar3 <- colMeans(p3)
xbar <- (n1*xbar1 + n2*xbar2 + n3*xbar3)/(n)

s1 <- cov(p1); s1.inv <- solve(s1)
s2 <- cov(p2); s2.inv <- solve(s2)
s3 <- cov(p3); s3.inv <- solve(s3)

# Checking Normality for Time Period 1
chisq1 <- diag(t(t(p1) - xbar1) %*% s1.inv %*% (t(p1) - xbar1))
ggplot(p1) + 
  geom_qq(aes(sample = chisq1), 
          distribution = qchisq, 
          dparams = c(1 - alpha, 4)) + 
  geom_qq_line(aes(sample = chisq1), 
               distribution = qchisq, 
               dparams = c(1 - alpha, 4),
               color = "red") +
  labs(x = "Theoretical Quantiles", 
       y = "Sample Quantiles", 
       title = "QQ-Plot for Time Period 1")

# Checking Normality for Time Period 2
chisq2 <- diag(t(t(p2) - xbar2) %*% s2.inv %*% (t(p2) - xbar2))
ggplot(p2) + 
  geom_qq(aes(sample = chisq2), 
          distribution = qchisq, 
          dparams = c(1 - alpha, 4)) + 
  geom_qq_line(aes(sample = chisq2), 
               distribution = qchisq, 
               dparams = c(1 - alpha, 4),
               color = "red") +
  labs(x = "Theoretical Quantiles", 
       y = "Sample Quantiles", 
       title = "QQ-Plot for Time Period 2")

# Checking Normality for Time Period 3
chisq3 <- diag(t(t(p3) - xbar3) %*% s3.inv %*% (t(p3) - xbar3))
ggplot(p3) + 
  geom_qq(aes(sample = chisq3), 
          distribution = qchisq, 
          dparams = c(1 - alpha, 4)) + 
  geom_qq_line(aes(sample = chisq3), 
               distribution = qchisq, 
               dparams = c(1-alpha, 4),
               color = "red") +
  labs(x = "Theoretical Quantiles", 
       y = "Sample Quantiles", 
       title = "QQ-Plot for Time Period 3")

It seems as if the data violate the MANOVA assumption of normality for at least one of the three time periods according to the qq-plots. Based on this violation, the conclusions derived from MANOVA may not be valid for this dataset. Additionally, the qqplots show possible outliers at the far ends of the distributions.

(2) (6.33 JW) In one experiment involving remote sensing, the spectral reflectance of three species 1-year-old seedlings was measured at various wavelengths at three different times (Julian day 150 [1], Julian day 235 [2], and Julian day 320 [3]) during the growing season. The species of seedlings used were sitka spruce (SS), Japanese larch (JL), and lodgepole pine (LP). Two of the variables measured were:

X1 = percent spectral reflectance at wavelength 560 nm (green);
X2 = percent spectral reflectance at wavelength 720 nm (near infrared)

kable(head(df2))

green	infrared	species	season	rep
9.33	19.14	SS	1	1
8.74	19.55	SS	1	2
9.31	19.24	SS	1	3
8.27	16.37	SS	1	4
10.22	25.00	SS	2	1
10.13	25.32	SS	2	2

a. Perform a two-factor MANOVA using the data. Test for a species effect, a time effect and species-time interaction. Use \(\alpha = 0.05\). You can use manova() function.

\(H_{0}:\) Species, season & species*season interaction effects are nonexistent.
\(H_{a}:\) Some species, seasonal or interaction effect exists. \(\alpha = 0.05\)

t2 <- manova(as.matrix(df2[,1:2]) ~ cbind(factor(df2[,4]), df2[,3]))
wilksum <- summary(t2,"Wilks"); wilksum

##                                   Df   Wilks approx F num Df den Df
## cbind(factor(df2[, 4]), df2[, 3])  2 0.30214   13.108      4     64
## Residuals                         33                               
##                                          Pr(>F)    
## cbind(factor(df2[, 4]), df2[, 3]) 0.00000007433 ***
## Residuals                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

hotsum <- summary(t2, "Hotelling-Lawley"); hotsum

##                                   Df Hotelling-Lawley approx F num Df
## cbind(factor(df2[, 4]), df2[, 3])  2           2.2412   17.369      4
## Residuals                         33                                 
##                                   den Df         Pr(>F)    
## cbind(factor(df2[, 4]), df2[, 3])     62 0.000000001318 ***
## Residuals                                                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pillsum <- summary(t2, "Pillai"); pillsum

##                                   Df  Pillai approx F num Df den Df
## cbind(factor(df2[, 4]), df2[, 3])  2 0.71856   9.2522      4     66
## Residuals                         33                               
##                                       Pr(>F)    
## cbind(factor(df2[, 4]), df2[, 3]) 0.00000536 ***
## Residuals                                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pval.wilksum <- unlist(wilksum[4])[11]
pval.hotsum <- unlist(hotsum[4])[11]
pval.pillsum <- unlist(pillsum[4])[11]

Conclusion: With p-values of 0.0000001, 0, 0.0000054 for their resepctive Wilks, Hotelling-Lawley & Pillai tests, we reject the null hypotheses of each test with statistically significant evidence at the confidence level \(\alpha = 0.05\) for each test. That is, the two-factor MANOVA results suggest that a seasonal effect, a species effect, and an interaction effect between season & species are at play and affecting spectral reflectance.

b. Foresters are particularly interested in the interaction of species and time. Does interaction show up for one variable but not for the other? Check by running variate two–factor ANOVA for each of the two responses X1 and X2. You can use anova() and lm() function.

fit <- lm(df2$green ~ df2$species*factor(df2$season))
anova(fit)

## Analysis of Variance Table
## 
## Response: df2$green
##                                Df  Sum Sq Mean Sq F value
## df2$species                     2  965.18  482.59 169.973
## factor(df2$season)              2 1275.25  637.62 224.578
## df2$species:factor(df2$season)  4  795.81  198.95  70.073
## Residuals                      27   76.66    2.84        
##                                               Pr(>F)    
## df2$species                    0.0000000000000005027 ***
## factor(df2$season)             < 0.00000000000000022 ***
## df2$species:factor(df2$season) 0.0000000000000734140 ***
## Residuals                                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit <- lm(df2$infrared ~ df2$species*factor(df2$season))
anova(fit)

## Analysis of Variance Table
## 
## Response: df2$infrared
##                                Df Sum Sq Mean Sq F value         Pr(>F)
## df2$species                     2 2026.9 1013.43 15.4622 0.000033479587
## factor(df2$season)              2 5573.8 2786.90 42.5207 0.000000004537
## df2$species:factor(df2$season)  4  193.5   48.39  0.7383         0.5741
## Residuals                      27 1769.6   65.54                       
##                                   
## df2$species                    ***
## factor(df2$season)             ***
## df2$species:factor(df2$season)    
## Residuals                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Based on the results of the two factor ANOVAs, interaction between predictor variables occurs for green light but not for near infrared light.

(3) (7.25 JW) Amitriptyline is prescribed by some physicians as an antidepressant. However,

there are also conjectured side effects that seem to be related to the use of the drug: irregular heartbeat, abnormal blood pressures, and irregular waves on the electrocardiogram among other things. The two response variables are

Y 1 = Total TCAD plasma level (TOT);
Y 2 = Amount of amitriptyline present in TCAD plasma level (AMI).
Z1 = Gender: 1 if female, 0 if male (GEN);
Z2 = Amount of antidepressants taken at time of overdose (AMT);
Z3 = PR wave measurement (PR;
Z4 = Diastolic blood pressure (DIAP);
Z5 = QRS wave measurement(QRS).

kable(head(df3))

plasma	ami	gender	amt	pri	diap	qrs
3389	3149	1	7500	220	0	140
1101	653	1	1975	200	0	100
1131	810	0	3600	205	60	111
596	448	1	675	160	60	120
896	844	1	750	185	70	83
1767	1450	1	2500	180	60	80

a. Perform a regression analysis using only the first response Y 1. State your model and construct a 95% prediction interval for the Total TCAD for z1 = 1, z2 = 1200, z3 = 140, z4 = 70, and z5 = 85.

# Fitting Model
fit.plasma <- lm(plasma ~ factor(gender) + amt + pri + diap + qrs, data = df3)
fit.plasma; summary(fit.plasma)

## 
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs, 
##     data = df3)
## 
## Coefficients:
##     (Intercept)  factor(gender)1              amt              pri  
##      -2879.4782         675.6508           0.2849          10.2721  
##            diap              qrs  
##          7.2512           7.5982

## 
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs, 
##     data = df3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -399.2 -180.1    4.5  164.1  366.8 
## 
## Coefficients:
##                    Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)     -2879.47825   893.26033  -3.224 0.008108 ** 
## factor(gender)1   675.65078   162.05563   4.169 0.001565 ** 
## amt                 0.28485     0.06091   4.677 0.000675 ***
## pri                10.27213     4.25489   2.414 0.034358 *  
## diap                7.25117     3.22516   2.248 0.046026 *  
## qrs                 7.59824     3.84887   1.974 0.074006 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared:  0.8871, Adjusted R-squared:  0.8358 
## F-statistic: 17.29 on 5 and 11 DF,  p-value: 0.00006983

c.plasma <- round(fit.plasma$coefficients, digits = 4)

# Evaluating Model
anova(fit.plasma)

## Analysis of Variance Table
## 
## Response: plasma
##                Df  Sum Sq Mean Sq F value     Pr(>F)    
## factor(gender)  1  288658  288658  3.6497    0.08248 .  
## amt             1 5616926 5616926 71.0179 0.00000397 ***
## pri             1  341134  341134  4.3131    0.06204 .  
## diap            1  280973  280973  3.5525    0.08613 .  
## qrs             1  308241  308241  3.8973    0.07401 .  
## Residuals      11  870008   79092                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

autoplot(fit.plasma, which = 1:6, ncol = 3, label.size = 3)

ggplot(fit.plasma, aes(x = fit.plasma$residuals)) + geom_histogram(bins = 5)

# Prediction Interval
pred.plasma <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
PI.95.plas <- predict(fit.plasma, newdata = pred.plasma, interval = 'prediction')
dimnames(PI.95.plas)[[2]] <- list("Estimate", "Lower Limit", "Upper Limit")
kable(PI.95.plas)

Estimate	Lower Limit	Upper Limit
729.5248	41.34785	1417.702

Model: \(Y_{1} = -2879.4782 + 675.6508Male + 0.2849amt + 10.2721pri + 7.2512diap + 7.5982qrs\)

b. Repeat Part (a) using the second response Y 2.

# Fitting Model
fit.ami <- lm(ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
fit.ami; summary(fit.ami)

## 
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
## 
## Coefficients:
##     (Intercept)  factor(gender)1              amt              pri  
##      -2728.7085         763.0298           0.3064           8.8962  
##            diap              qrs  
##          7.2056           4.9871

## 
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373.85 -247.29  -83.74  217.13  462.72 
## 
## Coefficients:
##                    Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)     -2728.70854   928.84655  -2.938 0.013502 *  
## factor(gender)1   763.02976   168.51169   4.528 0.000861 ***
## amt                 0.30637     0.06334   4.837 0.000521 ***
## pri                 8.89620     4.42440   2.011 0.069515 .  
## diap                7.20556     3.35364   2.149 0.054782 .  
## qrs                 4.98705     4.00220   1.246 0.238622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared:  0.8764, Adjusted R-squared:  0.8202 
## F-statistic:  15.6 on 5 and 11 DF,  p-value: 0.0001132

c.ami <- fit.ami$coefficients

# Evaluating Model
anova(fit.ami)

## Analysis of Variance Table
## 
## Response: ami
##                Df  Sum Sq Mean Sq F value      Pr(>F)    
## factor(gender)  1  532382  532382  6.2253     0.02977 *  
## amt             1 5457338 5457338 63.8143 0.000006623 ***
## pri             1  227012  227012  2.6545     0.13153    
## diap            1  320151  320151  3.7436     0.07913 .  
## qrs             1  132786  132786  1.5527     0.23862    
## Residuals      11  940709   85519                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

autoplot(fit.ami, which = 1:6, ncol = 3, label.size = 3)

ggplot(fit.ami, aes(x = fit.ami$residuals)) + geom_histogram(bins = 5)

# Prediction Interval
pred.ami <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
PI.95.ami <- predict(fit.ami, newdata = pred.ami, interval = 'prediction')
dimnames(PI.95.ami)[[2]] <- list("Estimate", "Lower Limit", "Upper Limit")
kable(PI.95.ami)

Estimate	Lower Limit	Upper Limit
575.7255	-139.8674	1291.318

Model: \(Y_{2} = -2728.7085444 + 763.0297617Male + 0.3063734amt + 8.8961977pri + 7.2055597diap + 4.9870508qrs\)

c. Perform a multivariate multiple regression analysis using both responses Y 1 and Y 2.

fit.plasmi <- lm(as.matrix(df3[,1:2]) ~ factor(gender) + amt + pri + diap + qrs , data = df3)
plasmi.sum <- summary(fit.plasmi); plasmi.sum

## Response plasma :
## 
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs, 
##     data = df3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -399.2 -180.1    4.5  164.1  366.8 
## 
## Coefficients:
##                    Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)     -2879.47825   893.26033  -3.224 0.008108 ** 
## factor(gender)1   675.65078   162.05563   4.169 0.001565 ** 
## amt                 0.28485     0.06091   4.677 0.000675 ***
## pri                10.27213     4.25489   2.414 0.034358 *  
## diap                7.25117     3.22516   2.248 0.046026 *  
## qrs                 7.59824     3.84887   1.974 0.074006 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared:  0.8871, Adjusted R-squared:  0.8358 
## F-statistic: 17.29 on 5 and 11 DF,  p-value: 0.00006983
## 
## 
## Response ami :
## 
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373.85 -247.29  -83.74  217.13  462.72 
## 
## Coefficients:
##                    Estimate  Std. Error t value Pr(>|t|)    
## (Intercept)     -2728.70854   928.84655  -2.938 0.013502 *  
## factor(gender)1   763.02976   168.51169   4.528 0.000861 ***
## amt                 0.30637     0.06334   4.837 0.000521 ***
## pri                 8.89620     4.42440   2.011 0.069515 .  
## diap                7.20556     3.35364   2.149 0.054782 .  
## qrs                 4.98705     4.00220   1.246 0.238622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared:  0.8764, Adjusted R-squared:  0.8202 
## F-statistic:  15.6 on 5 and 11 DF,  p-value: 0.0001132

man.plas <- Manova(fit.plasmi); man.plas

## 
## Type II MANOVA Tests: Pillai test statistic
##                Df test stat approx F num Df den Df   Pr(>F)   
## factor(gender)  1   0.65521   9.5015      2     10 0.004873 **
## amt             1   0.69097  11.1795      2     10 0.002819 **
## pri             1   0.34649   2.6509      2     10 0.119200   
## diap            1   0.32381   2.3944      2     10 0.141361   
## qrs             1   0.29184   2.0606      2     10 0.178092   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(man.plas, "Wilks")

## 
## Type II MANOVA Tests:
## 
## Sum of squares and products for error:
##          plasma      ami
## plasma 870008.3 765676.5
## ami    765676.5 940708.9
## 
## ------------------------------------------
##  
## Term: factor(gender) 
## 
## Sum of squares and products for the hypothesis:
##         plasma     ami
## plasma 1374824 1552624
## ami    1552624 1753418
## 
## Multivariate Test: factor(gender)
##                Df test stat approx F num Df den Df    Pr(>F)   
## factor(gender)  1 0.3447916 9.501514      2     10 0.0048729 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: amt 
## 
## Sum of squares and products for the hypothesis:
##         plasma     ami
## plasma 1729764 1860459
## ami    1860459 2001028
## 
## Multivariate Test: amt
##     Df test stat approx F num Df den Df    Pr(>F)   
## amt  1 0.3090326 11.17952      2     10 0.0028185 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## ------------------------------------------
##  
## Term: pri 
## 
## Sum of squares and products for the hypothesis:
##          plasma      ami
## plasma 460972.6 399226.1
## ami    399226.1 345750.4
## 
## Multivariate Test: pri
##     Df test stat approx F num Df den Df Pr(>F)
## pri  1 0.6535143 2.650942      2     10 0.1192
## 
## ------------------------------------------
##  
## Term: diap 
## 
## Sum of squares and products for the hypothesis:
##          plasma      ami
## plasma 399802.7 397287.9
## ami    397287.9 394788.8
## 
## Multivariate Test: diap
##      Df test stat approx F num Df den Df  Pr(>F)
## diap  1 0.6761857 2.394418      2     10 0.14136
## 
## ------------------------------------------
##  
## Term: qrs 
## 
## Sum of squares and products for the hypothesis:
##          plasma      ami
## plasma 308240.7 202311.6
## ami    202311.6 132785.8
## 
## Multivariate Test: qrs
##     Df test stat approx F num Df den Df  Pr(>F)
## qrs  1  0.708156 2.060591      2     10 0.17809

plot(fit.plasmi$residuals)

d. Construct a 95% prediction ellipse for both Total TCAD and Amount of amitriptyline present in TCAD for z1 = 1, z2 = 1200, z3 = 140, z4 = 70, and z5 = 85.

pred.plasmi <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
point <- as.data.frame(predict(fit.plasmi, newdata = pred.plasmi, interval = 'prediction'))
center <- c(point[1,1], point[1,2])

Z <- model.matrix(fit.plasmi)
resp <- fit.plasmi$model[[1]]
n <- nrow(resp); m <- ncol(resp); r <- ncol(Z) - 1
S <- crossprod(fit.plasmi$residuals)/(n - r - 1)

t2 <- terms(fit.plasmi)
term <- delete.response(t2)
mframe <- model.frame(term, pred.plasmi, na.action = na.pass, xlev = fit.plasmi$xlevels)
z0 <- model.matrix(term, mframe, contrasts.arg = fit.plasmi$contrasts)
radius <- sqrt((m*(n - r - 1)/(n - r - m))*qf(0.95, m, n - r - m)*z0%*%solve(t(Z)%*%Z) %*% t(z0))

lipsy <-as.data.frame(ellipse(center = c(center), shape = S, radius = c(radius), draw = FALSE))


ggplot(lipsy, aes(x, y)) + 
  geom_path() + 
  geom_point(aes(x = plasma, y = ami), data = point, alpha = 0.5) + 
  labs(x = "Total TCAD plasma level (plasma)", 
       y = "Amount of amitriptyline present in TCAD plasma level (ami)", 
       title = "95% Prediction Ellipse for Given Predictor Setting")

e. Compare this ellipse with the prediction intervals in Parts (a) and (b). Comment.

ggplot(lipsy, aes(x, y)) + 
  geom_hline(aes(y = PI.95.plas[[2]]), 
             yintercept = PI.95.plas[[2]], 
             color = "blue", 
             linetype = "dashed") + 
  geom_hline(aes(y = PI.95.plas[[3]]), 
             yintercept = PI.95.plas[[3]], 
             color = "blue", 
             linetype = "dashed") + 
  geom_vline(aes(x = PI.95.ami[[2]]), 
             xintercept = PI.95.ami[[2]], 
             color = "red", 
             linetype = "dashed") + 
  geom_vline(aes(x = PI.95.ami[[3]]), 
             xintercept = PI.95.ami[[3]], 
             color = "red", 
             linetype = "dashed") + 
  geom_path() + 
  geom_point(aes(x = plasma, y = ami), 
             data = point, 
             alpha = 0.5) + 
  labs(x = "Total TCAD plasma level (plasma)", 
       y = "Amount of amitriptyline present in TCAD plasma level (ami)", 
       title = "95% Prediction Ellipse for Given Predictor Setting",
       subtitle = "Red/Blue Dashed Lines Represent Prediction Interval Limits from Parts (a) & (b)")