Problem 1

skull <- read.csv("T6-13.dat.txt", sep = "", header=FALSE)
colnames(skull) <-c("MaxBreath",
"BasHeight", "BasLength", "NasHeight", "TunePeriod")
head(skull)

##   MaxBreath BasHeight BasLength NasHeight TunePeriod
## 1       131       138        89        49          1
## 2       125       131        92        48          1
## 3       131       132        99        50          1
## 4       119       132        96        44          1
## 5       136       143       100        54          1
## 6       138       137        89        56          1

Part A.

The MANOVA test is used to compare mean responses of several treatements or means of several populations.

Hypothesis: \(H_0\) : \(\mu_1 = \mu_2 = \dots = \mu_n\)

\(H_A\) : At least one \(\mu \neq 0\)

Decision Rule: If F Test Statistic > critical value, we reject the \(H_0\)

Conclusion: With F test statistic = 2.049 > 1.9939, and a p-value of 0.04358 < 0.05 = \(\alpha\), we reject our \(H_0\) and conclude that the time effect differences exist.

There is a difference of male Egyptian skulls for three different time periods.

test1 <- manova(cbind(skull[,1],skull[,2],skull[,3],skull[,4])~factor(skull[,5]))
summary(test1,'Wilks') #c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")

##                    Df  Wilks approx F num Df den Df  Pr(>F)  
## factor(skull[, 5])  2 0.8301   2.0491      8    168 0.04358 *
## Residuals          87                                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(test1)

##  Response 1 :
##                    Df Sum Sq Mean Sq F value  Pr(>F)  
## factor(skull[, 5])  2  150.2  75.100  3.6595 0.02979 *
## Residuals          87 1785.4  20.522                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response 2 :
##                    Df Sum Sq Mean Sq F value Pr(>F)
## factor(skull[, 5])  2   20.6  10.300  0.4657 0.6293
## Residuals          87 1924.3  22.118               
## 
##  Response 3 :
##                    Df  Sum Sq Mean Sq F value  Pr(>F)  
## factor(skull[, 5])  2  190.29  95.144  3.8447 0.02512 *
## Residuals          87 2153.00  24.747                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response 4 :
##                    Df Sum Sq Mean Sq F value Pr(>F)
## factor(skull[, 5])  2   2.02  1.0111  0.1047 0.9007
## Residuals          87 840.20  9.6575

#Critical Value F with df 2p and 2*(n-p-2)
#This is the Distribution of Wilks' Lambda because p=4, g=3 (notes page 22)
crit_value <- qf(0.95, 8, 168)
crit_value

## [1] 1.993884

Part B.

With an \(\alpha = 0.05\) all the Bonferroni simultaneous confidence intervals include 0 which suggests that there is no significant difference between the 3 time periods. This has a different conclusion than our MANOVA test above.

So we checked the normality assumptions to verify if the usual MANOVA assumptions are realistic for this data. It seems like the usual MANOVA assumptions are not realistic for the data based on the QQ Plots for the 3 different time period as the normality assumption is violated.

# pair comparison
g <- 3
p <- 4

#All Tune Periods are 30
n1 <- length(which((skull$TunePeriod==1)))
n2 <- length(which((skull$TunePeriod==2)))
n3 <- length(which((skull$TunePeriod==3)))
n <- n1+n2+n3

#Finding means across the columns and we don't want Tune Period so we remove it 
xbar1 <- colMeans(skull[skull$TunePeriod==1, -5])
xbar2 <- colMeans(skull[skull$TunePeriod==2,-5])
xbar3 <- colMeans(skull[skull$TunePeriod==3,-5])
xbar <- (n1*xbar1+n2*xbar2+n3*xbar3)/(n1+n2+n3)

#Finding SE
S1 <- cov(skull[skull$TunePeriod==1,-5])
S2 <- cov(skull[skull$TunePeriod==2,-5])
S3 <- cov(skull[skull$TunePeriod==3,-5])
W <- (n1-1)*S1+(n2-1)*S2+(n3-1)*S3

alpha <- 0.05
crit.b <-  qt(1-alpha/(p*g*(g-1)),df=n-g)

for ( i in 1:p ){
LCI12 <- (xbar1[i]-xbar2[i])-crit.b*sqrt(W[i,i]/(n-g)*(1/n1+1/n2))
UCI12 <- (xbar1[i]-xbar2[i])+crit.b*sqrt(W[i,i]/(n-g)*(1/n1+1/n2))
cat("mu1[",i,"]-mu2[",i,"] = (",LCI12,",",UCI12,")\n",sep="")

# \mu_{11}-\mu_{31}
LCI13 <- (xbar1[i]-xbar3[i])-crit.b*sqrt(W[i,i]/(n-g)*(1/n1+1/n3))
UCI13 <- (xbar1[i]-xbar3[i])+crit.b*sqrt(W[i,i]/(n-g)*(1/n1+1/n3))
cat("mu1[",i,"]-mu3[",i,"] = (",LCI13,",",UCI13,")\n",sep="")

# \mu_{21}-\mu_{31}
LCI23 <- (xbar2[i]-xbar3[i])-crit.b*sqrt(W[i,i]/(n-g)*(1/n2+1/n3))
UCI23 <- (xbar2[i]-xbar3[i])+crit.b*sqrt(W[i,i]/(n-g)*(1/n2+1/n3))
cat("mu2[",i,"]-mu3[",i,"] = (",LCI23,",",UCI23,")\n",sep="")
}

## mu1[1]-mu2[1] = (-4.442312,2.442312)
## mu1[1]-mu3[1] = (-6.542312,0.3423115)
## mu2[1]-mu3[1] = (-5.542312,1.342312)
## mu1[2]-mu2[2] = (-2.673706,4.473706)
## mu1[2]-mu3[2] = (-3.773706,3.373706)
## mu2[2]-mu3[2] = (-4.673706,2.473706)
## mu1[3]-mu2[3] = (-3.68011,3.88011)
## mu1[3]-mu3[3] = (-0.6467765,6.913443)
## mu2[3]-mu3[3] = (-0.7467765,6.813443)
## mu1[4]-mu2[4] = (-2.061423,2.661423)
## mu1[4]-mu3[4] = (-2.394756,2.328089)
## mu2[4]-mu3[4] = (-2.694756,2.028089)

#Intervals 1-3 are the 95% confidence intervals for Max Breath during the 3 different time periods

#Intervals 4-6 are the 95% confidence intervals for Bas Height during the 3 different time periods

#Intervals 7-9 are the 95% confidence intervals for Bas Length during the 3 different time periods

#Intervals 10-12 are the 95% confidence intervals for Nas Height during the 3 different time periods

#Normality 
#Time Period 1
S1inv <- solve(S1)
skull1 <- skull[skull$TunePeriod==1,-5]
datachisq <- diag(t(t(skull1)-xbar1) %*% S1inv %*% (t(skull1)-xbar1))
qqplot(qchisq(ppoints(500),df=p),datachisq, main="",
xlab="Theoretical Quantiles",ylab="Sample Quantiles")

#Time Period 2
S2inv <- solve(S2)
skull2 <- skull[skull$TunePeriod==2,-5]
datachisq <- diag(t(t(skull2)-xbar2) %*% S2inv %*% (t(skull2)-xbar2))
qqplot(qchisq(ppoints(500),df=p),datachisq,main="",
xlab="Theoretical Quantiles",ylab="Sample Quantiles")

#Time Period 3
S3inv <- solve(S3)
skull3 <- skull[skull$TunePeriod==3,-5]
datachisq <- diag(t(t(skull3)-xbar3) %*% S3inv %*% (t(skull3)-xbar3))
qqplot(qchisq(ppoints(500),df=p),datachisq,main="",
xlab="Theoretical Quantiles",ylab="Sample Quantiles")

Problem 2.

Part A.

Hypothesis: \(H_0\) : There is no effect of species or time (or interaction)

\(H_A\) : There is some effect of species or time (or interaction)

Decision Rule: With a p-value < \(\alpha\) = 0.05, we reject the \(H_0\).

Conclusion: From the Wilkes test, all p-values for species, time, and the interaction term are less than \(\alpha\) = 0.05.

Thus, we conclude that species, time, and the interaction of species and time have an affect on percent spectral reflectance at differing wavelengths.

jul <- read.csv("T6-18.dat.txt", sep = "", header=FALSE)
colnames(jul) <-c("five",
"seven", "Species", "Time", "Replication")
jul <- jul[,-5]

test2<-manova(cbind(jul$five,jul$seven)~factor(jul$Time)*jul$Species)
summary(test2,"Wilks") #c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")

##                              Df    Wilks approx F num Df den Df    Pr(>F)
## factor(jul$Time)              2 0.049166   45.629      4     52 < 2.2e-16
## jul$Species                   2 0.068774   36.571      4     52 1.554e-14
## factor(jul$Time):jul$Species  4 0.087070   15.528      8     52 2.217e-11
## Residuals                    27                                          
##                                 
## factor(jul$Time)             ***
## jul$Species                  ***
## factor(jul$Time):jul$Species ***
## Residuals                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(test2,"Pillai") #c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")

##                              Df  Pillai approx F num Df den Df    Pr(>F)
## factor(jul$Time)              2 0.99199  13.2853      4     54 1.330e-07
## jul$Species                   2 0.96120  12.4915      4     54 2.910e-07
## factor(jul$Time):jul$Species  4 0.92116   5.7634      8     54 2.606e-05
## Residuals                    27                                         
##                                 
## factor(jul$Time)             ***
## jul$Species                  ***
## factor(jul$Time):jul$Species ***
## Residuals                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(test2,"Hotelling-Lawley") #c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")

##                              Df Hotelling-Lawley approx F num Df den Df
## factor(jul$Time)              2           18.502  115.639      4     50
## jul$Species                   2           13.105   81.904      4     50
## factor(jul$Time):jul$Species  4           10.390   32.470      8     50
## Residuals                    27                                        
##                                 Pr(>F)    
## factor(jul$Time)             < 2.2e-16 ***
## jul$Species                  < 2.2e-16 ***
## factor(jul$Time):jul$Species < 2.2e-16 ***
## Residuals                                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(test2,"Roy") #c("Pillai", "Wilks", "Hotelling-Lawley", "Roy")

##                              Df    Roy approx F num Df den Df    Pr(>F)
## factor(jul$Time)              2 18.457  249.168      2     27 < 2.2e-16
## jul$Species                   2 13.071  176.462      2     27 3.144e-16
## factor(jul$Time):jul$Species  4 10.381   70.074      4     27 7.339e-14
## Residuals                    27                                        
##                                 
## factor(jul$Time)             ***
## jul$Species                  ***
## factor(jul$Time):jul$Species ***
## Residuals                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.aov(test2)

##  Response 1 :
##                              Df  Sum Sq Mean Sq F value    Pr(>F)    
## factor(jul$Time)              2 1275.25  637.62 224.578 < 2.2e-16 ***
## jul$Species                   2  965.18  482.59 169.973 5.027e-16 ***
## factor(jul$Time):jul$Species  4  795.81  198.95  70.073 7.341e-14 ***
## Residuals                    27   76.66    2.84                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response 2 :
##                              Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(jul$Time)              2 5573.8 2786.90 42.5207 4.537e-09 ***
## jul$Species                   2 2026.9 1013.43 15.4622 3.348e-05 ***
## factor(jul$Time):jul$Species  4  193.5   48.39  0.7383    0.5741    
## Residuals                    27 1769.6   65.54                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Part B.

From the ANOVA tables below, the interaction term is significant (p-value < \(\alpha\) = 0.05) for the 560nm wavelength and non significant for the 720nm.

Thus, we fail to reject the \(H_0\) for 720 nm, and conclude that the interaction of species and time does not have a significant effect on the 720 nm wavelength. Additionally, we reject the \(H_0\) for the 560 nm and conclude that the interaction of species and time does have a significant effect.

#Two Way ANOVA for 560nm
anova(lm(jul$five~jul$Species*factor(jul$Time)))

## Analysis of Variance Table
## 
## Response: jul$five
##                              Df  Sum Sq Mean Sq F value    Pr(>F)    
## jul$Species                   2  965.18  482.59 169.973 5.027e-16 ***
## factor(jul$Time)              2 1275.25  637.62 224.578 < 2.2e-16 ***
## jul$Species:factor(jul$Time)  4  795.81  198.95  70.073 7.341e-14 ***
## Residuals                    27   76.66    2.84                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Two Way ANOVA for 720nm
anova(lm(jul$seven~jul$Species*factor(jul$Time)))

## Analysis of Variance Table
## 
## Response: jul$seven
##                              Df Sum Sq Mean Sq F value    Pr(>F)    
## jul$Species                   2 2026.9 1013.43 15.4622 3.348e-05 ***
## factor(jul$Time)              2 5573.8 2786.90 42.5207 4.537e-09 ***
## jul$Species:factor(jul$Time)  4  193.5   48.39  0.7383    0.5741    
## Residuals                    27 1769.6   65.54                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 3

health <- read.csv("T7-6.dat.txt", sep = "", header=FALSE)
colnames(health) <-c("TOT",
"AMI", "GEN", "AMT", "PRI","DIAP","QRS")
head(health)

##    TOT  AMI GEN  AMT PRI DIAP QRS
## 1 3389 3149   1 7500 220    0 140
## 2 1101  653   1 1975 200    0 100
## 3 1131  810   0 3600 205   60 111
## 4  596  448   1  675 160   60 120
## 5  896  844   1  750 185   70  83
## 6 1767 1450   1 2500 180   60  80

Part A.

The regression equation is

Y1 TOT = -2879 + 676 Z1 GEN + 0.285 Z2 AMT + 10.3 Z3 PRI + 7.25 Z4 DIAP + 7.60 Z5 QRS

The residual plots indicate small departures from normality. Thus, we conclude that the model has been fitted well.

The 95% confidence interval for TOT is (41.3 1417.7).

#Fitting Linear Model
hmod<- lm(TOT ~ factor(GEN)+AMT+PRI+DIAP+QRS , data = health)
summary(hmod)

## 
## Call:
## lm(formula = TOT ~ factor(GEN) + AMT + PRI + DIAP + QRS, data = health)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -399.2 -180.1    4.5  164.1  366.8 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.879e+03  8.933e+02  -3.224 0.008108 ** 
## factor(GEN)1  6.757e+02  1.621e+02   4.169 0.001565 ** 
## AMT           2.848e-01  6.091e-02   4.677 0.000675 ***
## PRI           1.027e+01  4.255e+00   2.414 0.034358 *  
## DIAP          7.251e+00  3.225e+00   2.248 0.046026 *  
## QRS           7.598e+00  3.849e+00   1.974 0.074006 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared:  0.8871, Adjusted R-squared:  0.8358 
## F-statistic: 17.29 on 5 and 11 DF,  p-value: 6.983e-05

anova(hmod)

## Analysis of Variance Table
## 
## Response: TOT
##             Df  Sum Sq Mean Sq F value   Pr(>F)    
## factor(GEN)  1  288658  288658  3.6497  0.08248 .  
## AMT          1 5616926 5616926 71.0179 3.97e-06 ***
## PRI          1  341134  341134  4.3131  0.06204 .  
## DIAP         1  280973  280973  3.5525  0.08613 .  
## QRS          1  308241  308241  3.8973  0.07401 .  
## Residuals   11  870008   79092                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Residual Plots
plot(hmod)

hist(hmod$residuals)

#95% Prediction Interval
new.dat <- data.frame(GEN = 1, AMT = 1200, PRI = 140, DIAP = 70, QRS = 85)
predict(hmod, newdata = new.dat, interval = 'prediction')

##        fit      lwr      upr
## 1 729.5248 41.34785 1417.702

Part B.

The regression equation is

Y2 AMI = -2729 + 763 Z1 GEN + 0.306 Z2 AMT + 8.90 Z3 PRI + 7.21 Z4 DIAP + 4.99 Z5 QRS

The residual plots indicate small departures from normality. Thus, we conclude that the model has been fitted well.

The 95% confidence interval for AMI is (-139.8674, 1291.318).

#Fitting Linear Model
hmod2 <- lm(AMI ~ factor(GEN)+AMT+PRI+DIAP+QRS , data = health)
summary(hmod2)

## 
## Call:
## lm(formula = AMI ~ factor(GEN) + AMT + PRI + DIAP + QRS, data = health)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373.85 -247.29  -83.74  217.13  462.72 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.729e+03  9.288e+02  -2.938 0.013502 *  
## factor(GEN)1  7.630e+02  1.685e+02   4.528 0.000861 ***
## AMT           3.064e-01  6.334e-02   4.837 0.000521 ***
## PRI           8.896e+00  4.424e+00   2.011 0.069515 .  
## DIAP          7.206e+00  3.354e+00   2.149 0.054782 .  
## QRS           4.987e+00  4.002e+00   1.246 0.238622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared:  0.8764, Adjusted R-squared:  0.8202 
## F-statistic:  15.6 on 5 and 11 DF,  p-value: 0.0001132

anova(hmod2)

## Analysis of Variance Table
## 
## Response: AMI
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## factor(GEN)  1  532382  532382  6.2253   0.02977 *  
## AMT          1 5457338 5457338 63.8143 6.623e-06 ***
## PRI          1  227012  227012  2.6545   0.13153    
## DIAP         1  320151  320151  3.7436   0.07913 .  
## QRS          1  132786  132786  1.5527   0.23862    
## Residuals   11  940709   85519                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Residual Plots
plot(hmod2)

hist(hmod2$residuals)

#95% Prediction Interval
new.dat <- data.frame(GEN = 1, AMT = 1200, PRI = 140, DIAP = 70, QRS = 85)
predict(hmod2, newdata = new.dat, interval = 'prediction')

##        fit       lwr      upr
## 1 575.7255 -139.8674 1291.318

Part C.

Hypothesis: \(H_0\) : There is no effect of predictors.

\(H_A\) : There is some effect of at least one of the predictors.

Decision Rule: With a p-value < \(\alpha\) = 0.05, we reject the \(H_0\).

Conclusion: From the Wilkes test, the p-value of AMT is less than \(\alpha\) = 0.05. Thus, AMT is significant and we reject the null hypotehesis for Z2.

#Fitting Multivariate Lin Model
hmod3 <- lm(cbind(TOT, AMI) ~ factor(GEN)+AMT+PRI+DIAP+QRS , data = health)
summary(hmod3)

## Response TOT :
## 
## Call:
## lm(formula = TOT ~ factor(GEN) + AMT + PRI + DIAP + QRS, data = health)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -399.2 -180.1    4.5  164.1  366.8 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.879e+03  8.933e+02  -3.224 0.008108 ** 
## factor(GEN)1  6.757e+02  1.621e+02   4.169 0.001565 ** 
## AMT           2.848e-01  6.091e-02   4.677 0.000675 ***
## PRI           1.027e+01  4.255e+00   2.414 0.034358 *  
## DIAP          7.251e+00  3.225e+00   2.248 0.046026 *  
## QRS           7.598e+00  3.849e+00   1.974 0.074006 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 281.2 on 11 degrees of freedom
## Multiple R-squared:  0.8871, Adjusted R-squared:  0.8358 
## F-statistic: 17.29 on 5 and 11 DF,  p-value: 6.983e-05
## 
## 
## Response AMI :
## 
## Call:
## lm(formula = AMI ~ factor(GEN) + AMT + PRI + DIAP + QRS, data = health)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -373.85 -247.29  -83.74  217.13  462.72 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.729e+03  9.288e+02  -2.938 0.013502 *  
## factor(GEN)1  7.630e+02  1.685e+02   4.528 0.000861 ***
## AMT           3.064e-01  6.334e-02   4.837 0.000521 ***
## PRI           8.896e+00  4.424e+00   2.011 0.069515 .  
## DIAP          7.206e+00  3.354e+00   2.149 0.054782 .  
## QRS           4.987e+00  4.002e+00   1.246 0.238622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292.4 on 11 degrees of freedom
## Multiple R-squared:  0.8764, Adjusted R-squared:  0.8202 
## F-statistic:  15.6 on 5 and 11 DF,  p-value: 0.0001132

library(car)

## Loading required package: carData

Manova(hmod3)

## 
## Type II MANOVA Tests: Pillai test statistic
##             Df test stat approx F num Df den Df   Pr(>F)   
## factor(GEN)  1   0.65521   9.5015      2     10 0.004873 **
## AMT          1   0.69097  11.1795      2     10 0.002819 **
## PRI          1   0.34649   2.6509      2     10 0.119200   
## DIAP         1   0.32381   2.3944      2     10 0.141361   
## QRS          1   0.29184   2.0606      2     10 0.178092   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(manova(hmod3), "Wilks")

##             Df   Wilks approx F num Df den Df    Pr(>F)    
## factor(GEN)  1 0.63334    2.895      2     10    0.1019    
## AMT          1 0.13002   33.455      2     10 3.716e-05 ***
## PRI          1 0.71562    1.987      2     10    0.1877    
## DIAP         1 0.73534    1.800      2     10    0.2150    
## QRS          1 0.70816    2.061      2     10    0.1781    
## Residuals   11                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

hmod3$residuals

##            TOT        AMI
## 1   132.821721  161.52769
## 2   -72.003916 -264.35329
## 3  -399.247694 -373.85244
## 4  -382.847295 -247.29456
## 5  -152.391292   15.78777
## 6   366.786445  217.13206
## 7     4.499942  -83.74210
## 8   294.556802  462.72401
## 9   101.840674  223.03576
## 10 -180.052350 -251.05364
## 11 -182.639086 -103.05464
## 12  164.133887  102.04026
## 13  -74.266182  -91.93874
## 14  123.305413  245.27247
## 15 -230.159411 -248.99241
## 16  211.544890  -85.15163
## 17  274.117452  321.91345

Part D.

Prediction Ellipse below.

new.dat <- data.frame(GEN = 1, AMT = 1200, PRI = 140, DIAP = 70, QRS = 85)

#Prediction
p <- predict(hmod3, newdata = new.dat, interval = 'prediction')

#Center
cent <- c(p[1,1],p[1,2])

#Shape
Z <- model.matrix(hmod3)
Y <- hmod3$model[[1]]
n <- nrow(Y)
m <- ncol(Y)
r <- ncol(Z) - 1
S <- crossprod(resid(hmod3))/(n-r-1)

#Radius
tt <- terms(hmod3)
Terms <- delete.response(tt)
mf <- model.frame(Terms, new.dat, na.action = na.pass, xlev = hmod3$xlevels)
z0 <- model.matrix(Terms, mf, contrasts.arg = hmod3$contrasts)
rad <- sqrt((m*(n-r-1)/(n-r-m))*qf(0.95,m,n-r-m)*z0%*%solve(t(Z)%*%Z) %*% t(z0))

#Points
library(car)
points <- ellipse(center = c(cent), shape = S, radius = c(rad), draw = FALSE)

#Plot
require(ggplot2, quietly = TRUE)
points_df <- as.data.frame(points)
ggplot(points_df, aes(x, y)) +
    geom_path() +
    geom_point(aes(x = TOT, y = AMI), data = data.frame(p)) +
    labs(x = "Total TCAD", y = "AMI", 
      title = "95% Confidence Interval for TCAD and AMI")

Part E.

The 95% prediction intervals in part a and b which were TOT = (41.3 1417.7) AMI = (-139.8674, 1291.318) respectively.

The predicted values for hmod3 shown in the ellipse using the multivariate model were (729.5248, 575.7255) where the coordinate pair follows (TOT, AMI).

The values fall within the prediction intervals so the conclusions agree.

STAT 488 Multivariate HW 4

Kajal Chokshi

3/21/2019

Problem 1

Part A.

Part B.

Problem 2.

Part A.

Part B.

Problem 3

Part A.

Part B.

Part C.

Part D.

Part E.