SOAL 1 Overdoses of the drug amitriptyline (ami_data.csv) A researcher wants to know the relationship between gender (male = 0, female = 1), amount of drug taken at time of overdose (AMT), PR wave measurement (PR), diastolic blood pressure (DIAP), QRS wave measurement (QRS) toward total TCAD plasma level (TOT) and the amount of amitriptyline present in the TCAD plasma level (AMI) simultaneously. Use a significance level α=0.05.
# Read file
ami_data <- read.csv("D:/UNY/MySta/SEM 5/StatMul/dataset StatMul/ami_data.csv")
head(ami_data)
## TOT AMI GEN AMT PR DIAP QRS
## 1 3389 3149 1 7500 220 0 140
## 2 1101 653 1 1975 200 0 100
## 3 1131 810 0 3600 205 60 111
## 4 596 448 1 675 160 60 120
## 5 896 844 1 750 185 70 83
## 6 1767 1450 1 2500 180 60 80
str(ami_data)
## 'data.frame': 17 obs. of 7 variables:
## $ TOT : int 3389 1101 1131 596 896 1767 807 1111 645 628 ...
## $ AMI : int 3149 653 810 448 844 1450 493 941 547 392 ...
## $ GEN : int 1 1 0 1 1 1 1 0 1 1 ...
## $ AMT : int 7500 1975 3600 675 750 2500 350 1500 375 1050 ...
## $ PR : int 220 200 205 160 185 180 154 200 137 167 ...
## $ DIAP: int 0 0 60 60 70 60 80 70 60 60 ...
## $ QRS : int 140 100 111 120 83 80 98 93 105 74 ...
summary(ami_data)
## TOT AMI GEN AMT
## Min. : 500 Min. : 384.0 Min. :0.0000 Min. : 350
## 1st Qu.: 652 1st Qu.: 458.0 1st Qu.:0.0000 1st Qu.: 750
## Median : 896 Median : 653.0 Median :1.0000 Median :1750
## Mean :1120 Mean : 882.4 Mean :0.7059 Mean :2146
## 3rd Qu.:1131 3rd Qu.: 941.0 3rd Qu.:1.0000 3rd Qu.:3000
## Max. :3389 Max. :3149.0 Max. :1.0000 Max. :7500
## PR DIAP QRS
## Min. :135.0 Min. : 0 Min. : 60.00
## 1st Qu.:160.0 1st Qu.:60 1st Qu.: 80.00
## Median :180.0 Median :60 Median : 98.00
## Mean :174.9 Mean :52 Mean : 97.18
## 3rd Qu.:185.0 3rd Qu.:70 3rd Qu.:111.00
## Max. :220.0 Max. :90 Max. :140.00
ami_data <- ami_data[, sapply(ami_data, is.numeric)]
str(ami_data)
## 'data.frame': 17 obs. of 7 variables:
## $ TOT : int 3389 1101 1131 596 896 1767 807 1111 645 628 ...
## $ AMI : int 3149 653 810 448 844 1450 493 941 547 392 ...
## $ GEN : int 1 1 0 1 1 1 1 0 1 1 ...
## $ AMT : int 7500 1975 3600 675 750 2500 350 1500 375 1050 ...
## $ PR : int 220 200 205 160 185 180 154 200 137 167 ...
## $ DIAP: int 0 0 60 60 70 60 80 70 60 60 ...
## $ QRS : int 140 100 111 120 83 80 98 93 105 74 ...
sum(is.na(ami_data))
## [1] 0
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
# Buat matrix plot dengan scatter, hist, dan koef korelasi
pairs.panels(ami_data,
method = "pearson",
hist.col = "#00AFBB",
density = TRUE,
ellipses = FALSE,
lm = TRUE)
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggpairs(ami_data, lower = list(continuous="smooth"))
# Matrix X (n x p) with intercept
X <- as.matrix(cbind(Intercept = 1,
GEN = as.numeric(ami_data$GEN),
AMT = as.numeric(ami_data$AMT),
PR = as.numeric(ami_data$PR),
DIAP = as.numeric(ami_data$DIAP),
QRS = as.numeric(ami_data$QRS)))
X
## Intercept GEN AMT PR DIAP QRS
## [1,] 1 1 7500 220 0 140
## [2,] 1 1 1975 200 0 100
## [3,] 1 0 3600 205 60 111
## [4,] 1 1 675 160 60 120
## [5,] 1 1 750 185 70 83
## [6,] 1 1 2500 180 60 80
## [7,] 1 1 350 154 80 98
## [8,] 1 0 1500 200 70 93
## [9,] 1 1 375 137 60 105
## [10,] 1 1 1050 167 60 74
## [11,] 1 1 3000 180 60 80
## [12,] 1 1 450 160 64 60
## [13,] 1 1 1750 135 90 79
## [14,] 1 0 2000 160 60 80
## [15,] 1 0 4500 180 0 100
## [16,] 1 0 1500 170 90 120
## [17,] 1 1 3000 180 0 129
# Matrix Y (n x m) with 2 dependen variable: TOT dan AMI
Y <- as.matrix(cbind(TOT = as.numeric(ami_data$TOT), AMI = as.numeric(ami_data$AMI)))
Y
## TOT AMI
## [1,] 3389 3149
## [2,] 1101 653
## [3,] 1131 810
## [4,] 596 448
## [5,] 896 844
## [6,] 1767 1450
## [7,] 807 493
## [8,] 1111 941
## [9,] 645 547
## [10,] 628 392
## [11,] 1360 1283
## [12,] 652 458
## [13,] 860 722
## [14,] 500 384
## [15,] 781 501
## [16,] 1070 405
## [17,] 1754 1520
n <- nrow(X); p <- ncol(X); m <- ncol(Y)
df <- n - p
# Hitungan OLS multivariate
XtX <- t(X) %*% X
XtY <- t(X) %*% Y
XtX_inv <- solve(XtX)
beta.hat <- XtX_inv %*% XtY # p x m
yhat <- X %*% beta.hat # n x m
residuals <- Y - yhat # n x m
residual.ss <- t(residuals) %*% residuals # m x m
total_residual_ss_trace <- sum(diag(residual.ss))
# Estimasi kovarian error dan residual standard errors
Sigma.hat <- residual.ss / df
resid_se <- sqrt(diag(Sigma.hat))
options(max.print = 1e6)
cat("Beta hat (rows = terms; cols = TOT, AMI):\n")
## Beta hat (rows = terms; cols = TOT, AMI):
print(as.data.frame(round(beta.hat, 6)), row.names = TRUE)
## TOT AMI
## Intercept -2879.478246 -2728.708544
## GEN 675.650781 763.029762
## AMT 0.284851 0.306373
## PR 10.272133 8.896198
## DIAP 7.251171 7.205560
## QRS 7.598240 4.987051
cat("\nFitted values (Yhat):\n")
##
## Fitted values (Yhat):
print(as.data.frame(round(yhat, 4)), row.names = TRUE)
## TOT AMI
## 1 3256.1783 2987.4723
## 2 1173.0039 917.3533
## 3 1530.2477 1183.8524
## 4 978.8473 695.2946
## 5 1048.3913 828.2122
## 6 1400.2136 1232.8679
## 7 802.5001 576.7421
## 8 816.4432 478.2760
## 9 543.1593 323.9642
## 10 808.0523 643.0536
## 11 1542.6391 1386.0546
## 12 487.8661 355.9597
## 13 934.2662 813.9387
## 14 376.6946 138.7275
## 15 1011.1594 749.9924
## 16 858.4551 490.1516
## 17 1479.8825 1198.0865
cat("\nResiduals:\n")
##
## Residuals:
print(as.data.frame(round(residuals, 4)), row.names = TRUE)
## TOT AMI
## 1 132.8217 161.5277
## 2 -72.0039 -264.3533
## 3 -399.2477 -373.8524
## 4 -382.8473 -247.2946
## 5 -152.3913 15.7878
## 6 366.7864 217.1321
## 7 4.4999 -83.7421
## 8 294.5568 462.7240
## 9 101.8407 223.0358
## 10 -180.0523 -251.0536
## 11 -182.6391 -103.0546
## 12 164.1339 102.0403
## 13 -74.2662 -91.9387
## 14 123.3054 245.2725
## 15 -230.1594 -248.9924
## 16 211.5449 -85.1516
## 17 274.1175 321.9135
cat("\nResidual SSE matrix (t(E)E):\n")
##
## Residual SSE matrix (t(E)E):
print(round(residual.ss, 4))
## TOT AMI
## TOT 870008.3 765676.5
## AMI 765676.5 940708.9
cat("\nTrace of residual.ss (sum of diag) =", round(total_residual_ss_trace, 4), "\n")
##
## Trace of residual.ss (sum of diag) = 1810717
cat("Degrees of freedom (n-p) =", df, "\n")
## Degrees of freedom (n-p) = 11
cat("\nEstimated Sigma.hat = residual.ss / (n-p):\n")
##
## Estimated Sigma.hat = residual.ss / (n-p):
print(round(Sigma.hat, 4))
## TOT AMI
## TOT 79091.66 69606.95
## AMI 69606.95 85518.99
cat("\nResidual standard errors (per response):\n")
##
## Residual standard errors (per response):
resid_se_named <- setNames(round(resid_se, 4), colnames(Y))
print(resid_se_named)
## TOT AMI
## 281.2324 292.4363
# Rata-rata kolom Y
ybar <- colMeans(Y)
n <- nrow(Y); m <- ncol(Y)
Ybar <- matrix(ybar, n, m, byrow = TRUE)
# Fit regresi multivariat
fit <- lm(cbind(TOT, AMI) ~ GEN + AMT + PR + DIAP + QRS, data = ami_data)
# SSCP matrices
SSCP.Total <- crossprod(Y - Ybar)
SSCP.Reg <- crossprod(fitted(fit) - Ybar)
SSCP.Error <- crossprod(Y - fitted(fit))
cat("SSCP Total:\n"); print(round(SSCP.Total, 2))
## SSCP Total:
## TOT AMI
## TOT 7705940 7474767
## AMI 7474767 7610378
cat("\nSSCP Regression:\n"); print(round(SSCP.Reg, 2))
##
## SSCP Regression:
## TOT AMI
## TOT 6835932 6709091
## AMI 6709091 6669669
cat("\nSSCP Error:\n"); print(round(SSCP.Error, 2))
##
## SSCP Error:
## TOT AMI
## TOT 870008.3 765676.5
## AMI 765676.5 940708.9
# Verifikasi dekomposisi
cat("\nCheck Total ≈ Regression + Error:\n")
##
## Check Total ≈ Regression + Error:
print(round(SSCP.Reg + SSCP.Error, 2))
## TOT AMI
## TOT 7705940 7474767
## AMI 7474767 7610378
library(MVN)
##
## Attaching package: 'MVN'
## The following object is masked from 'package:psych':
##
## mardia
library(nortest)
library(psych)
# Uji Henze-Zirkler
res_df <- as.data.frame(residuals(fit))
result <- mvn(data = res_df, mvn_test = "hz")
summary(result, select = "mvn")
##
## ── Multivariate Normality Test Results ─────────────────────────────────────────
## Test Statistic p.value Method MVN
## 1 Henze-Zirkler 0.242 0.843 asymptotic ✓ Normal
# Uji normalitas univariat (Anderson–Darling)
apply(res_df, 2, ad.test)
## $TOT
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 0.2583, p-value = 0.6721
##
##
## $AMI
##
## Anderson-Darling normality test
##
## data: newX[, i]
## A = 0.38525, p-value = 0.3524
# Statistik deskriptif
res_desc <- describe(res_df)
print(res_desc, digits = 15)
## vars n mean sd median trimmed mad min max
## TOT 1 17 0e+00 233.1856 4.499942 2.164083 273.6172 -399.2477 366.7864
## AMI 2 17 3e-15 242.4754 -83.742102 -5.924771 267.7742 -373.8524 462.7240
## range skew kurtosis se
## TOT 766.0341 -0.1555823 -1.266963 56.55581
## AMI 836.5764 0.2212360 -1.265142 58.80892