kable(head(df1))
| breadth | height | length | nasal_length | period |
|---|---|---|---|---|
| 131 | 138 | 89 | 49 | 1 |
| 125 | 131 | 92 | 48 | 1 |
| 131 | 132 | 99 | 50 | 1 |
| 119 | 132 | 96 | 44 | 1 |
| 136 | 143 | 100 | 54 | 1 |
| 138 | 137 | 89 | 56 | 1 |
\(H_{0}: \mu_{1} = \mu_{2} = ... = \mu_{n} = 0\)
\(H_{a}:\) at least one \(\mu_{i} \neq 0\) for i = 1, 2, …, n
\(\alpha = 0.05\)
crit <- qf(0.95, df1 = 8, df2 = 168)
t1 <- manova(as.matrix(df1[,1:4])~factor(df1[,5]))
wilksum <- summary(t1, "Wilks"); wilksum
## Df Wilks approx F num Df den Df Pr(>F)
## factor(df1[, 5]) 2 0.8301 2.0491 8 168 0.04358 *
## Residuals 87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aovsum <- summary.aov(t1); aovsum
## Response breadth :
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5]) 2 150.2 75.100 3.6595 0.02979 *
## Residuals 87 1785.4 20.522
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response height :
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5]) 2 20.6 10.300 0.4657 0.6293
## Residuals 87 1924.3 22.118
##
## Response length :
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5]) 2 190.29 95.144 3.8447 0.02512 *
## Residuals 87 2153.00 24.747
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response nasal_length :
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(df1[, 5]) 2 2.02 1.0111 0.1047 0.9007
## Residuals 87 840.20 9.6575
f <- unlist(wilksum[4])[5]
pval <- unlist(wilksum[4])[11]
Critical Value: F-Statistic > 1.9938839
Conclusion: With a p-value of 0.0435825, we reject the null with statistically significant evidence at the confidence level \(\alpha = 0.05\). That is, the one-way MANOVA results suggest that a time effect is at play.
alpha <- 0.05
n <- nrow(df1)
p1 <- df1 %>% filter(period == 1) %>% select(1:4)
n1 <- length(p1)
p2 <- df1 %>% filter(period == 2) %>% select(1:4)
n2 <- length(p2)
p3 <- df1 %>% filter(period == 3) %>% select(1:4)
n3 <- length(p3)
xbar1 <- colMeans(p1)
xbar2 <- colMeans(p2)
xbar3 <- colMeans(p3)
xbar <- (n1*xbar1 + n2*xbar2 + n3*xbar3)/(n)
s1 <- cov(p1); s1.inv <- solve(s1)
s2 <- cov(p2); s2.inv <- solve(s2)
s3 <- cov(p3); s3.inv <- solve(s3)
# Checking Normality for Time Period 1
chisq1 <- diag(t(t(p1) - xbar1) %*% s1.inv %*% (t(p1) - xbar1))
ggplot(p1) +
geom_qq(aes(sample = chisq1),
distribution = qchisq,
dparams = c(1 - alpha, 4)) +
geom_qq_line(aes(sample = chisq1),
distribution = qchisq,
dparams = c(1 - alpha, 4),
color = "red") +
labs(x = "Theoretical Quantiles",
y = "Sample Quantiles",
title = "QQ-Plot for Time Period 1")
# Checking Normality for Time Period 2
chisq2 <- diag(t(t(p2) - xbar2) %*% s2.inv %*% (t(p2) - xbar2))
ggplot(p2) +
geom_qq(aes(sample = chisq2),
distribution = qchisq,
dparams = c(1 - alpha, 4)) +
geom_qq_line(aes(sample = chisq2),
distribution = qchisq,
dparams = c(1 - alpha, 4),
color = "red") +
labs(x = "Theoretical Quantiles",
y = "Sample Quantiles",
title = "QQ-Plot for Time Period 2")
# Checking Normality for Time Period 3
chisq3 <- diag(t(t(p3) - xbar3) %*% s3.inv %*% (t(p3) - xbar3))
ggplot(p3) +
geom_qq(aes(sample = chisq3),
distribution = qchisq,
dparams = c(1 - alpha, 4)) +
geom_qq_line(aes(sample = chisq3),
distribution = qchisq,
dparams = c(1-alpha, 4),
color = "red") +
labs(x = "Theoretical Quantiles",
y = "Sample Quantiles",
title = "QQ-Plot for Time Period 3")
It seems as if the data violate the MANOVA assumption of normality for at least one of the three time periods according to the qq-plots. Based on this violation, the conclusions derived from MANOVA may not be valid for this dataset. Additionally, the qqplots show possible outliers at the far ends of the distributions.
kable(head(df2))
| green | infrared | species | season | rep |
|---|---|---|---|---|
| 9.33 | 19.14 | SS | 1 | 1 |
| 8.74 | 19.55 | SS | 1 | 2 |
| 9.31 | 19.24 | SS | 1 | 3 |
| 8.27 | 16.37 | SS | 1 | 4 |
| 10.22 | 25.00 | SS | 2 | 1 |
| 10.13 | 25.32 | SS | 2 | 2 |
\(H_{0}:\) Species, season & species*season interaction effects are nonexistent.
\(H_{a}:\) Some species, seasonal or interaction effect exists. \(\alpha = 0.05\)
t2 <- manova(as.matrix(df2[,1:2]) ~ cbind(factor(df2[,4]), df2[,3]))
wilksum <- summary(t2,"Wilks"); wilksum
## Df Wilks approx F num Df den Df
## cbind(factor(df2[, 4]), df2[, 3]) 2 0.30214 13.108 4 64
## Residuals 33
## Pr(>F)
## cbind(factor(df2[, 4]), df2[, 3]) 0.00000007433 ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hotsum <- summary(t2, "Hotelling-Lawley"); hotsum
## Df Hotelling-Lawley approx F num Df
## cbind(factor(df2[, 4]), df2[, 3]) 2 2.2412 17.369 4
## Residuals 33
## den Df Pr(>F)
## cbind(factor(df2[, 4]), df2[, 3]) 62 0.000000001318 ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pillsum <- summary(t2, "Pillai"); pillsum
## Df Pillai approx F num Df den Df
## cbind(factor(df2[, 4]), df2[, 3]) 2 0.71856 9.2522 4 66
## Residuals 33
## Pr(>F)
## cbind(factor(df2[, 4]), df2[, 3]) 0.00000536 ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pval.wilksum <- unlist(wilksum[4])[11]
pval.hotsum <- unlist(hotsum[4])[11]
pval.pillsum <- unlist(pillsum[4])[11]
Conclusion: With p-values of 0.0000001, 0, 0.0000054 for their resepctive Wilks, Hotelling-Lawley & Pillai tests, we reject the null hypotheses of each test with statistically significant evidence at the confidence level \(\alpha = 0.05\) for each test. That is, the two-factor MANOVA results suggest that a seasonal effect, a species effect, and an interaction effect between season & species are at play and affecting spectral reflectance.
fit <- lm(df2$green ~ df2$species*factor(df2$season))
anova(fit)
## Analysis of Variance Table
##
## Response: df2$green
## Df Sum Sq Mean Sq F value
## df2$species 2 965.18 482.59 169.973
## factor(df2$season) 2 1275.25 637.62 224.578
## df2$species:factor(df2$season) 4 795.81 198.95 70.073
## Residuals 27 76.66 2.84
## Pr(>F)
## df2$species 0.0000000000000005027 ***
## factor(df2$season) < 0.00000000000000022 ***
## df2$species:factor(df2$season) 0.0000000000000734140 ***
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit <- lm(df2$infrared ~ df2$species*factor(df2$season))
anova(fit)
## Analysis of Variance Table
##
## Response: df2$infrared
## Df Sum Sq Mean Sq F value Pr(>F)
## df2$species 2 2026.9 1013.43 15.4622 0.000033479587
## factor(df2$season) 2 5573.8 2786.90 42.5207 0.000000004537
## df2$species:factor(df2$season) 4 193.5 48.39 0.7383 0.5741
## Residuals 27 1769.6 65.54
##
## df2$species ***
## factor(df2$season) ***
## df2$species:factor(df2$season)
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Based on the results of the two factor ANOVAs, interaction between predictor variables occurs for green light but not for near infrared light.
there are also conjectured side effects that seem to be related to the use of the drug: irregular heartbeat, abnormal blood pressures, and irregular waves on the electrocardiogram among other things. The two response variables are
kable(head(df3))
| plasma | ami | gender | amt | pri | diap | qrs |
|---|---|---|---|---|---|---|
| 3389 | 3149 | 1 | 7500 | 220 | 0 | 140 |
| 1101 | 653 | 1 | 1975 | 200 | 0 | 100 |
| 1131 | 810 | 0 | 3600 | 205 | 60 | 111 |
| 596 | 448 | 1 | 675 | 160 | 60 | 120 |
| 896 | 844 | 1 | 750 | 185 | 70 | 83 |
| 1767 | 1450 | 1 | 2500 | 180 | 60 | 80 |
# Fitting Model
fit.plasma <- lm(plasma ~ factor(gender) + amt + pri + diap + qrs, data = df3)
fit.plasma; summary(fit.plasma)
##
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs,
## data = df3)
##
## Coefficients:
## (Intercept) factor(gender)1 amt pri
## -2879.4782 675.6508 0.2849 10.2721
## diap qrs
## 7.2512 7.5982
##
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs,
## data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -399.2 -180.1 4.5 164.1 366.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2879.47825 893.26033 -3.224 0.008108 **
## factor(gender)1 675.65078 162.05563 4.169 0.001565 **
## amt 0.28485 0.06091 4.677 0.000675 ***
## pri 10.27213 4.25489 2.414 0.034358 *
## diap 7.25117 3.22516 2.248 0.046026 *
## qrs 7.59824 3.84887 1.974 0.074006 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared: 0.8871, Adjusted R-squared: 0.8358
## F-statistic: 17.29 on 5 and 11 DF, p-value: 0.00006983
c.plasma <- round(fit.plasma$coefficients, digits = 4)
# Evaluating Model
anova(fit.plasma)
## Analysis of Variance Table
##
## Response: plasma
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(gender) 1 288658 288658 3.6497 0.08248 .
## amt 1 5616926 5616926 71.0179 0.00000397 ***
## pri 1 341134 341134 4.3131 0.06204 .
## diap 1 280973 280973 3.5525 0.08613 .
## qrs 1 308241 308241 3.8973 0.07401 .
## Residuals 11 870008 79092
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(fit.plasma, which = 1:6, ncol = 3, label.size = 3)
ggplot(fit.plasma, aes(x = fit.plasma$residuals)) + geom_histogram(bins = 5)
# Prediction Interval
pred.plasma <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
PI.95.plas <- predict(fit.plasma, newdata = pred.plasma, interval = 'prediction')
dimnames(PI.95.plas)[[2]] <- list("Estimate", "Lower Limit", "Upper Limit")
kable(PI.95.plas)
| Estimate | Lower Limit | Upper Limit |
|---|---|---|
| 729.5248 | 41.34785 | 1417.702 |
Model: \(Y_{1} = -2879.4782 + 675.6508Male + 0.2849amt + 10.2721pri + 7.2512diap + 7.5982qrs\)
# Fitting Model
fit.ami <- lm(ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
fit.ami; summary(fit.ami)
##
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
##
## Coefficients:
## (Intercept) factor(gender)1 amt pri
## -2728.7085 763.0298 0.3064 8.8962
## diap qrs
## 7.2056 4.9871
##
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -373.85 -247.29 -83.74 217.13 462.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2728.70854 928.84655 -2.938 0.013502 *
## factor(gender)1 763.02976 168.51169 4.528 0.000861 ***
## amt 0.30637 0.06334 4.837 0.000521 ***
## pri 8.89620 4.42440 2.011 0.069515 .
## diap 7.20556 3.35364 2.149 0.054782 .
## qrs 4.98705 4.00220 1.246 0.238622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared: 0.8764, Adjusted R-squared: 0.8202
## F-statistic: 15.6 on 5 and 11 DF, p-value: 0.0001132
c.ami <- fit.ami$coefficients
# Evaluating Model
anova(fit.ami)
## Analysis of Variance Table
##
## Response: ami
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(gender) 1 532382 532382 6.2253 0.02977 *
## amt 1 5457338 5457338 63.8143 0.000006623 ***
## pri 1 227012 227012 2.6545 0.13153
## diap 1 320151 320151 3.7436 0.07913 .
## qrs 1 132786 132786 1.5527 0.23862
## Residuals 11 940709 85519
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(fit.ami, which = 1:6, ncol = 3, label.size = 3)
ggplot(fit.ami, aes(x = fit.ami$residuals)) + geom_histogram(bins = 5)
# Prediction Interval
pred.ami <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
PI.95.ami <- predict(fit.ami, newdata = pred.ami, interval = 'prediction')
dimnames(PI.95.ami)[[2]] <- list("Estimate", "Lower Limit", "Upper Limit")
kable(PI.95.ami)
| Estimate | Lower Limit | Upper Limit |
|---|---|---|
| 575.7255 | -139.8674 | 1291.318 |
Model: \(Y_{2} = -2728.7085444 + 763.0297617Male + 0.3063734amt + 8.8961977pri + 7.2055597diap + 4.9870508qrs\)
fit.plasmi <- lm(as.matrix(df3[,1:2]) ~ factor(gender) + amt + pri + diap + qrs , data = df3)
plasmi.sum <- summary(fit.plasmi); plasmi.sum
## Response plasma :
##
## Call:
## lm(formula = plasma ~ factor(gender) + amt + pri + diap + qrs,
## data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -399.2 -180.1 4.5 164.1 366.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2879.47825 893.26033 -3.224 0.008108 **
## factor(gender)1 675.65078 162.05563 4.169 0.001565 **
## amt 0.28485 0.06091 4.677 0.000675 ***
## pri 10.27213 4.25489 2.414 0.034358 *
## diap 7.25117 3.22516 2.248 0.046026 *
## qrs 7.59824 3.84887 1.974 0.074006 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared: 0.8871, Adjusted R-squared: 0.8358
## F-statistic: 17.29 on 5 and 11 DF, p-value: 0.00006983
##
##
## Response ami :
##
## Call:
## lm(formula = ami ~ factor(gender) + amt + pri + diap + qrs, data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -373.85 -247.29 -83.74 217.13 462.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2728.70854 928.84655 -2.938 0.013502 *
## factor(gender)1 763.02976 168.51169 4.528 0.000861 ***
## amt 0.30637 0.06334 4.837 0.000521 ***
## pri 8.89620 4.42440 2.011 0.069515 .
## diap 7.20556 3.35364 2.149 0.054782 .
## qrs 4.98705 4.00220 1.246 0.238622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared: 0.8764, Adjusted R-squared: 0.8202
## F-statistic: 15.6 on 5 and 11 DF, p-value: 0.0001132
man.plas <- Manova(fit.plasmi); man.plas
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## factor(gender) 1 0.65521 9.5015 2 10 0.004873 **
## amt 1 0.69097 11.1795 2 10 0.002819 **
## pri 1 0.34649 2.6509 2 10 0.119200
## diap 1 0.32381 2.3944 2 10 0.141361
## qrs 1 0.29184 2.0606 2 10 0.178092
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(man.plas, "Wilks")
##
## Type II MANOVA Tests:
##
## Sum of squares and products for error:
## plasma ami
## plasma 870008.3 765676.5
## ami 765676.5 940708.9
##
## ------------------------------------------
##
## Term: factor(gender)
##
## Sum of squares and products for the hypothesis:
## plasma ami
## plasma 1374824 1552624
## ami 1552624 1753418
##
## Multivariate Test: factor(gender)
## Df test stat approx F num Df den Df Pr(>F)
## factor(gender) 1 0.3447916 9.501514 2 10 0.0048729 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: amt
##
## Sum of squares and products for the hypothesis:
## plasma ami
## plasma 1729764 1860459
## ami 1860459 2001028
##
## Multivariate Test: amt
## Df test stat approx F num Df den Df Pr(>F)
## amt 1 0.3090326 11.17952 2 10 0.0028185 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: pri
##
## Sum of squares and products for the hypothesis:
## plasma ami
## plasma 460972.6 399226.1
## ami 399226.1 345750.4
##
## Multivariate Test: pri
## Df test stat approx F num Df den Df Pr(>F)
## pri 1 0.6535143 2.650942 2 10 0.1192
##
## ------------------------------------------
##
## Term: diap
##
## Sum of squares and products for the hypothesis:
## plasma ami
## plasma 399802.7 397287.9
## ami 397287.9 394788.8
##
## Multivariate Test: diap
## Df test stat approx F num Df den Df Pr(>F)
## diap 1 0.6761857 2.394418 2 10 0.14136
##
## ------------------------------------------
##
## Term: qrs
##
## Sum of squares and products for the hypothesis:
## plasma ami
## plasma 308240.7 202311.6
## ami 202311.6 132785.8
##
## Multivariate Test: qrs
## Df test stat approx F num Df den Df Pr(>F)
## qrs 1 0.708156 2.060591 2 10 0.17809
plot(fit.plasmi$residuals)
pred.plasmi <- data.frame(gender = 1, amt = 1200, pri = 140, diap = 70, qrs = 85)
point <- as.data.frame(predict(fit.plasmi, newdata = pred.plasmi, interval = 'prediction'))
center <- c(point[1,1], point[1,2])
Z <- model.matrix(fit.plasmi)
resp <- fit.plasmi$model[[1]]
n <- nrow(resp); m <- ncol(resp); r <- ncol(Z) - 1
S <- crossprod(fit.plasmi$residuals)/(n - r - 1)
t2 <- terms(fit.plasmi)
term <- delete.response(t2)
mframe <- model.frame(term, pred.plasmi, na.action = na.pass, xlev = fit.plasmi$xlevels)
z0 <- model.matrix(term, mframe, contrasts.arg = fit.plasmi$contrasts)
radius <- sqrt((m*(n - r - 1)/(n - r - m))*qf(0.95, m, n - r - m)*z0%*%solve(t(Z)%*%Z) %*% t(z0))
lipsy <-as.data.frame(ellipse(center = c(center), shape = S, radius = c(radius), draw = FALSE))
ggplot(lipsy, aes(x, y)) +
geom_path() +
geom_point(aes(x = plasma, y = ami), data = point, alpha = 0.5) +
labs(x = "Total TCAD plasma level (plasma)",
y = "Amount of amitriptyline present in TCAD plasma level (ami)",
title = "95% Prediction Ellipse for Given Predictor Setting")
ggplot(lipsy, aes(x, y)) +
geom_hline(aes(y = PI.95.plas[[2]]),
yintercept = PI.95.plas[[2]],
color = "blue",
linetype = "dashed") +
geom_hline(aes(y = PI.95.plas[[3]]),
yintercept = PI.95.plas[[3]],
color = "blue",
linetype = "dashed") +
geom_vline(aes(x = PI.95.ami[[2]]),
xintercept = PI.95.ami[[2]],
color = "red",
linetype = "dashed") +
geom_vline(aes(x = PI.95.ami[[3]]),
xintercept = PI.95.ami[[3]],
color = "red",
linetype = "dashed") +
geom_path() +
geom_point(aes(x = plasma, y = ami),
data = point,
alpha = 0.5) +
labs(x = "Total TCAD plasma level (plasma)",
y = "Amount of amitriptyline present in TCAD plasma level (ami)",
title = "95% Prediction Ellipse for Given Predictor Setting",
subtitle = "Red/Blue Dashed Lines Represent Prediction Interval Limits from Parts (a) & (b)")