# Membuat dataset numerik berdasarkan tabel yang diberikan
data <- data.frame(
Age = c(30.83, 58.67, 24.50, 27.83),
Debt = c(0.000, 4.460, 0.500, 1.540),
YearsEmployed = c(1.25, 3.04, 1.50, 3.75),
Income = c(0, 560, 824, 3)
)
# Menampilkan dataset
knitr::kable(data)
| Age | Debt | YearsEmployed | Income |
|---|---|---|---|
| 30.83 | 0.00 | 1.25 | 0 |
| 58.67 | 4.46 | 3.04 | 560 |
| 24.50 | 0.50 | 1.50 | 824 |
| 27.83 | 1.54 | 3.75 | 3 |
Jika dataset sudah ditambahkan, maka kita akan lanjut ke tahap berikutnya, yaitu menghitung Variance-Covariance Matrix, Eigen Value dan Eigen Vector, serta Correlation Matrix.
# Menghitung Variance-Covariance Matrix
cov_matrix <- cov(data)
# Menampilkan hasil
print("Variance-Covariance Matrix:")
## [1] "Variance-Covariance Matrix:"
cov_matrix
## Age Debt YearsEmployed Income
## Age 246.15983 28.767550 6.580750 1315.7125
## Debt 28.76755 3.983567 1.526967 220.1150
## YearsEmployed 6.58075 1.526967 1.454567 -119.4483
## Income 1315.71250 220.115000 -119.448333 170547.5833
# Menghitung Eigen Value dan Eigen Vector dari covariance matrix
eigen_values <- eigen(cov_matrix)$values
eigen_vectors <- eigen(cov_matrix)$vectors
# Menampilkan hasil
print("Eigen Values:")
## [1] "Eigen Values:"
eigen_values
## [1] 1.705581e+05 2.393532e+02 1.712288e+00 -1.632702e-11
print("Eigen Vectors:")
## [1] "Eigen Vectors:"
eigen_vectors
## [,1] [,2] [,3] [,4]
## [1,] -0.0077252614 0.992899630 0.0928018443 0.0740166350
## [2,] -0.0012918442 0.114272856 -0.5786760906 -0.8075110078
## [3,] 0.0007000127 0.032113304 -0.8102601508 0.5851894853
## [4,] -0.9999690803 -0.007795793 -0.0005365682 0.0008810479
cor_matrix <- cor(data)
# Menampilkan hasil
print("Correlation Matrix:")
## [1] "Correlation Matrix:"
cor_matrix
## Age Debt YearsEmployed Income
## Age 1.0000000 0.9186673 0.3477763 0.2030625
## Debt 0.9186673 1.0000000 0.6343467 0.2670489
## YearsEmployed 0.3477763 0.6343467 1.0000000 -0.2398228
## Income 0.2030625 0.2670489 -0.2398228 1.0000000
Matriks kovarians menunjukkan bagaimana dua variabel berubah bersama-sama.
Jika nilai kovarians positif → Ketika satu variabel naik, variabel lain juga cenderung naik.
Jika nilai kovarians negatif → Ketika satu variabel naik, variabel lain cenderung turun.
Jika nilai mendekati nol → Tidak ada hubungan yang kuat antara kedua variabel.
Eigen Decomposition adalah teknik dalam aljabar linear yang digunakan untuk mendekomposisi matrix persegi menjadi eigen values (nilai eigen) dan eigen vectors (vektor eigen). Teknik ini sering digunakan dalam Principal Component Analysis (PCA) untuk menemukan pola utama dalam data.
Korelasi mengukur seberapa kuat hubungan linear antara dua variabel, dengan nilai berkisar antara -1 hingga 1: +1 → Korelasi sempurna positif (saat satu naik, yang lain naik). 0 → Tidak ada hubungan. -1 → Korelasi sempurna negatif (saat satu naik, yang lain turun).
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
path = "E:\\data-assignment2.csv"
mf<-read_csv(path, show_col_types = FALSE)
# Create data-frames
X<-mf %>% dplyr::select( `Sistolik (X1)`, `Diastolik (X2)`) %>%
scale()
Y<-mf %>% dplyr::select(`Tinggi (Y1)`, `Berat (Y2)`) %>%
scale()
cc <- cancor(X,Y)
str(cc)
## List of 5
## $ cor : num [1:2] 0.721 0.195
## $ xcoef : num [1:2, 1:2] 0.214 -0.595 0.692 -0.412
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Sistolik (X1)" "Diastolik (X2)"
## .. ..$ : NULL
## $ ycoef : num [1:2, 1:2] 0.228 -0.648 -1.123 0.945
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "Tinggi (Y1)" "Berat (Y2)"
## .. ..$ : NULL
## $ xcenter: Named num [1:2] 3.98e-16 1.15e-15
## ..- attr(*, "names")= chr [1:2] "Sistolik (X1)" "Diastolik (X2)"
## $ ycenter: Named num [1:2] -1.11e-15 -4.28e-16
## ..- attr(*, "names")= chr [1:2] "Tinggi (Y1)" "Berat (Y2)"
print(cc)
## $cor
## [1] 0.7206701 0.1953755
##
## $xcoef
## [,1] [,2]
## Sistolik (X1) 0.2135035 0.6918022
## Diastolik (X2) -0.5952280 -0.4121620
##
## $ycoef
## [,1] [,2]
## Tinggi (Y1) 0.2281666 -1.1233476
## Berat (Y2) -0.6483502 0.9453105
##
## $xcenter
## Sistolik (X1) Diastolik (X2)
## 3.978299e-16 1.147230e-15
##
## $ycenter
## Tinggi (Y1) Berat (Y2)
## -1.110223e-15 -4.278985e-16
cc$cor
## [1] 0.7206701 0.1953755
CC1_X <- as.matrix(X) %*% cc$xcoef[, 1]
CC1_Y <- as.matrix(Y) %*% cc$ycoef[, 1]
CC2_X <- as.matrix(X) %*% cc$xcoef[, 2]
CC2_Y <- as.matrix(Y) %*% cc$ycoef[, 2]
cca_df <- mf %>%
mutate(CC1_X=CC1_X,
CC1_Y=CC1_Y,
CC2_X=CC2_X,
CC2_Y=CC2_Y) %>%
glimpse()
## Rows: 6
## Columns: 8
## $ `Sistolik (X1)` <dbl> 120, 109, 130, 121, 135, 140
## $ `Diastolik (X2)` <dbl> 76, 80, 82, 78, 85, 87
## $ `Tinggi (Y1)` <dbl> 165, 180, 170, 185, 180, 187
## $ `Berat (Y2)` <dbl> 60, 80, 70, 85, 90, 87
## $ CC1_X <dbl[,1]> <matrix[6 x 1]>
## $ CC1_Y <dbl[,1]> <matrix[6 x 1]>
## $ CC2_X <dbl[,1]> <matrix[6 x 1]>
## $ CC2_Y <dbl[,1]> <matrix[6 x 1]>
#| fig.width: 5.5
#| fig.height: 4
cca_df %>%
ggplot(aes(x=CC1_X,y=CC1_Y, color=`Tinggi (Y1)`))+
geom_point()
# First Canonical Variate of X vs Latent Variable
p1<-cca_df %>%
ggplot(aes(x=`Tinggi (Y1)`,y=CC1_X, color=`Tinggi (Y1)`))+
geom_boxplot(width=0.5)+
geom_jitter(width=0.15)+
theme(legend.position="none")+
ggtitle("First Canonical Variate of X vs Tinggi")
# First Canonical Variate of Y vs Latent Variable
p2<-cca_df %>%
ggplot(aes(x=`Tinggi (Y1)`,y=CC1_Y, color=`Tinggi (Y1)`))+
geom_boxplot(width=0.5)+
geom_jitter(width=0.15)+
theme(legend.position="none")+
ggtitle("First Canonical Variate of Y vs Tinggi")
library(patchwork)
p1+p2
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?