A1 D - Assignment 1 Statmul

SOAL 1 Overdoses of the drug amitriptyline (ami_data.csv) A researcher wants to know the relationship between gender (male = 0, female = 1), amount of drug taken at time of overdose (AMT), PR wave measurement (PR), diastolic blood pressure (DIAP), QRS wave measurement (QRS) toward total TCAD plasma level (TOT) and the amount of amitriptyline present in the TCAD plasma level (AMI) simultaneously. Use a significance level α=0.05.

What are the dependent and independent variables?

# Read file
ami_data <- read.csv("D:/UNY/MySta/SEM 5/StatMul/dataset StatMul/ami_data.csv")
head(ami_data)

##    TOT  AMI GEN  AMT  PR DIAP QRS
## 1 3389 3149   1 7500 220    0 140
## 2 1101  653   1 1975 200    0 100
## 3 1131  810   0 3600 205   60 111
## 4  596  448   1  675 160   60 120
## 5  896  844   1  750 185   70  83
## 6 1767 1450   1 2500 180   60  80

str(ami_data)

## 'data.frame':    17 obs. of  7 variables:
##  $ TOT : int  3389 1101 1131 596 896 1767 807 1111 645 628 ...
##  $ AMI : int  3149 653 810 448 844 1450 493 941 547 392 ...
##  $ GEN : int  1 1 0 1 1 1 1 0 1 1 ...
##  $ AMT : int  7500 1975 3600 675 750 2500 350 1500 375 1050 ...
##  $ PR  : int  220 200 205 160 185 180 154 200 137 167 ...
##  $ DIAP: int  0 0 60 60 70 60 80 70 60 60 ...
##  $ QRS : int  140 100 111 120 83 80 98 93 105 74 ...

summary(ami_data)

##       TOT            AMI              GEN              AMT      
##  Min.   : 500   Min.   : 384.0   Min.   :0.0000   Min.   : 350  
##  1st Qu.: 652   1st Qu.: 458.0   1st Qu.:0.0000   1st Qu.: 750  
##  Median : 896   Median : 653.0   Median :1.0000   Median :1750  
##  Mean   :1120   Mean   : 882.4   Mean   :0.7059   Mean   :2146  
##  3rd Qu.:1131   3rd Qu.: 941.0   3rd Qu.:1.0000   3rd Qu.:3000  
##  Max.   :3389   Max.   :3149.0   Max.   :1.0000   Max.   :7500  
##        PR             DIAP         QRS        
##  Min.   :135.0   Min.   : 0   Min.   : 60.00  
##  1st Qu.:160.0   1st Qu.:60   1st Qu.: 80.00  
##  Median :180.0   Median :60   Median : 98.00  
##  Mean   :174.9   Mean   :52   Mean   : 97.18  
##  3rd Qu.:185.0   3rd Qu.:70   3rd Qu.:111.00  
##  Max.   :220.0   Max.   :90   Max.   :140.00

Create a matrix plot for all variables and the correlation coefficients. Interpret them.

ami_data <- ami_data[, sapply(ami_data, is.numeric)]
str(ami_data)

## 'data.frame':    17 obs. of  7 variables:
##  $ TOT : int  3389 1101 1131 596 896 1767 807 1111 645 628 ...
##  $ AMI : int  3149 653 810 448 844 1450 493 941 547 392 ...
##  $ GEN : int  1 1 0 1 1 1 1 0 1 1 ...
##  $ AMT : int  7500 1975 3600 675 750 2500 350 1500 375 1050 ...
##  $ PR  : int  220 200 205 160 185 180 154 200 137 167 ...
##  $ DIAP: int  0 0 60 60 70 60 80 70 60 60 ...
##  $ QRS : int  140 100 111 120 83 80 98 93 105 74 ...

sum(is.na(ami_data))

## [1] 0

library(psych)

## Warning: package 'psych' was built under R version 4.4.3

# Buat matrix plot dengan scatter, hist, dan koef korelasi
pairs.panels(ami_data,
             method = "pearson",   
             hist.col = "#00AFBB",
             density = TRUE,       
             ellipses = FALSE,
             lm = TRUE)

library(GGally)

## Warning: package 'GGally' was built under R version 4.4.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.4.3

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

ggpairs(ami_data, lower = list(continuous="smooth"))

Determine the matrix quantities for Y and X.

# Matrix X (n x p) with intercept

X <- as.matrix(cbind(Intercept = 1,
                     GEN = as.numeric(ami_data$GEN),
                     AMT = as.numeric(ami_data$AMT),
                     PR  = as.numeric(ami_data$PR),
                     DIAP = as.numeric(ami_data$DIAP),
                     QRS = as.numeric(ami_data$QRS)))
X

##       Intercept GEN  AMT  PR DIAP QRS
##  [1,]         1   1 7500 220    0 140
##  [2,]         1   1 1975 200    0 100
##  [3,]         1   0 3600 205   60 111
##  [4,]         1   1  675 160   60 120
##  [5,]         1   1  750 185   70  83
##  [6,]         1   1 2500 180   60  80
##  [7,]         1   1  350 154   80  98
##  [8,]         1   0 1500 200   70  93
##  [9,]         1   1  375 137   60 105
## [10,]         1   1 1050 167   60  74
## [11,]         1   1 3000 180   60  80
## [12,]         1   1  450 160   64  60
## [13,]         1   1 1750 135   90  79
## [14,]         1   0 2000 160   60  80
## [15,]         1   0 4500 180    0 100
## [16,]         1   0 1500 170   90 120
## [17,]         1   1 3000 180    0 129

# Matrix Y (n x m) with 2 dependen variable: TOT dan AMI
Y <- as.matrix(cbind(TOT = as.numeric(ami_data$TOT), AMI = as.numeric(ami_data$AMI)))
Y

##        TOT  AMI
##  [1,] 3389 3149
##  [2,] 1101  653
##  [3,] 1131  810
##  [4,]  596  448
##  [5,]  896  844
##  [6,] 1767 1450
##  [7,]  807  493
##  [8,] 1111  941
##  [9,]  645  547
## [10,]  628  392
## [11,] 1360 1283
## [12,]  652  458
## [13,]  860  722
## [14,]  500  384
## [15,]  781  501
## [16,] 1070  405
## [17,] 1754 1520

Calculate the least square estimates β ̂, the fitted values Y ̂, the residuals ε ̂, and the residual sum of squares ε ̂’ε ̂.

n <- nrow(X); p <- ncol(X); m <- ncol(Y)
df <- n - p

# Hitungan OLS multivariate
XtX <- t(X) %*% X
XtY <- t(X) %*% Y
XtX_inv <- solve(XtX)
beta.hat <- XtX_inv %*% XtY      # p x m
yhat <- X %*% beta.hat           # n x m
residuals <- Y - yhat            # n x m
residual.ss <- t(residuals) %*% residuals   # m x m
total_residual_ss_trace <- sum(diag(residual.ss))

# Estimasi kovarian error dan residual standard errors
Sigma.hat <- residual.ss / df
resid_se <- sqrt(diag(Sigma.hat))

options(max.print = 1e6)


cat("Beta hat (rows = terms; cols = TOT, AMI):\n")

## Beta hat (rows = terms; cols = TOT, AMI):

print(as.data.frame(round(beta.hat, 6)), row.names = TRUE)

##                    TOT          AMI
## Intercept -2879.478246 -2728.708544
## GEN         675.650781   763.029762
## AMT           0.284851     0.306373
## PR           10.272133     8.896198
## DIAP          7.251171     7.205560
## QRS           7.598240     4.987051

cat("\nFitted values (Yhat):\n")

## 
## Fitted values (Yhat):

print(as.data.frame(round(yhat, 4)), row.names = TRUE)

##          TOT       AMI
## 1  3256.1783 2987.4723
## 2  1173.0039  917.3533
## 3  1530.2477 1183.8524
## 4   978.8473  695.2946
## 5  1048.3913  828.2122
## 6  1400.2136 1232.8679
## 7   802.5001  576.7421
## 8   816.4432  478.2760
## 9   543.1593  323.9642
## 10  808.0523  643.0536
## 11 1542.6391 1386.0546
## 12  487.8661  355.9597
## 13  934.2662  813.9387
## 14  376.6946  138.7275
## 15 1011.1594  749.9924
## 16  858.4551  490.1516
## 17 1479.8825 1198.0865

cat("\nResiduals:\n")

## 
## Residuals:

print(as.data.frame(round(residuals, 4)), row.names = TRUE)

##          TOT       AMI
## 1   132.8217  161.5277
## 2   -72.0039 -264.3533
## 3  -399.2477 -373.8524
## 4  -382.8473 -247.2946
## 5  -152.3913   15.7878
## 6   366.7864  217.1321
## 7     4.4999  -83.7421
## 8   294.5568  462.7240
## 9   101.8407  223.0358
## 10 -180.0523 -251.0536
## 11 -182.6391 -103.0546
## 12  164.1339  102.0403
## 13  -74.2662  -91.9387
## 14  123.3054  245.2725
## 15 -230.1594 -248.9924
## 16  211.5449  -85.1516
## 17  274.1175  321.9135

cat("\nResidual SSE matrix (t(E)E):\n")

## 
## Residual SSE matrix (t(E)E):

print(round(residual.ss, 4))

##          TOT      AMI
## TOT 870008.3 765676.5
## AMI 765676.5 940708.9

cat("\nTrace of residual.ss (sum of diag) =", round(total_residual_ss_trace, 4), "\n")

## 
## Trace of residual.ss (sum of diag) = 1810717

cat("Degrees of freedom (n-p) =", df, "\n")

## Degrees of freedom (n-p) = 11

cat("\nEstimated Sigma.hat = residual.ss / (n-p):\n")

## 
## Estimated Sigma.hat = residual.ss / (n-p):

print(round(Sigma.hat, 4))

##          TOT      AMI
## TOT 79091.66 69606.95
## AMI 69606.95 85518.99

cat("\nResidual standard errors (per response):\n")

## 
## Residual standard errors (per response):

resid_se_named <- setNames(round(resid_se, 4), colnames(Y))
print(resid_se_named)

##      TOT      AMI 
## 281.2324 292.4363

Determine the sum of squares and crossproducts of total, regression, and error.

# Rata-rata kolom Y
ybar <- colMeans(Y)
n <- nrow(Y); m <- ncol(Y)
Ybar <- matrix(ybar, n, m, byrow = TRUE)

# Fit regresi multivariat
fit <- lm(cbind(TOT, AMI) ~ GEN + AMT + PR + DIAP + QRS, data = ami_data)

# SSCP matrices
SSCP.Total <- crossprod(Y - Ybar)
SSCP.Reg   <- crossprod(fitted(fit) - Ybar)
SSCP.Error <- crossprod(Y - fitted(fit))

cat("SSCP Total:\n"); print(round(SSCP.Total, 2))

## SSCP Total:

##         TOT     AMI
## TOT 7705940 7474767
## AMI 7474767 7610378

cat("\nSSCP Regression:\n"); print(round(SSCP.Reg, 2))

## 
## SSCP Regression:

##         TOT     AMI
## TOT 6835932 6709091
## AMI 6709091 6669669

cat("\nSSCP Error:\n"); print(round(SSCP.Error, 2))

## 
## SSCP Error:

##          TOT      AMI
## TOT 870008.3 765676.5
## AMI 765676.5 940708.9

# Verifikasi dekomposisi
cat("\nCheck Total ≈ Regression + Error:\n")

## 
## Check Total ≈ Regression + Error:

print(round(SSCP.Reg + SSCP.Error, 2))

##         TOT     AMI
## TOT 7705940 7474767
## AMI 7474767 7610378

Check multivariate normality using a Henze-Zirkler test and give conclusion.

library(MVN)

## 
## Attaching package: 'MVN'

## The following object is masked from 'package:psych':
## 
##     mardia

library(nortest)
library(psych)

# Uji Henze-Zirkler
res_df <- as.data.frame(residuals(fit))
result <- mvn(data = res_df, mvn_test = "hz")
summary(result, select = "mvn")

##

## ── Multivariate Normality Test Results ─────────────────────────────────────────

##            Test Statistic p.value     Method      MVN
## 1 Henze-Zirkler     0.242   0.843 asymptotic ✓ Normal

#  Uji normalitas univariat (Anderson–Darling)
apply(res_df, 2, ad.test)

## $TOT
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 0.2583, p-value = 0.6721
## 
## 
## $AMI
## 
##  Anderson-Darling normality test
## 
## data:  newX[, i]
## A = 0.38525, p-value = 0.3524

# Statistik deskriptif
res_desc <- describe(res_df)
print(res_desc, digits = 15)

##     vars  n  mean       sd     median   trimmed      mad       min      max
## TOT    1 17 0e+00 233.1856   4.499942  2.164083 273.6172 -399.2477 366.7864
## AMI    2 17 3e-15 242.4754 -83.742102 -5.924771 267.7742 -373.8524 462.7240
##        range       skew  kurtosis       se
## TOT 766.0341 -0.1555823 -1.266963 56.55581
## AMI 836.5764  0.2212360 -1.265142 58.80892

A1 D - Assignment 1 Statmul

Maulana Diki

2025-09-08