Tugas Praktikum Anreg 2

Mohon izin kak, semua syntax dibawah saya tulis sendiri namun untuk beberapa penjelasan hasilnya saya meng-copy dari contoh yang kakak kirim sebagai bahan materi

Syntax untuk membaca data “data_liver”

library(tidyverse, ggplot2)
## Warning: package 'tidyverse' was built under R version 4.4.2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tibble' was built under R version 4.4.2
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.2
## Warning: package 'dplyr' was built under R version 4.4.2
## Warning: package 'stringr' was built under R version 4.4.2
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data_liver = read.csv("C:/Users/bilal/Downloads/Fhar1st STA1231_Anreg_P2 main Minggu 4 (1)/data liver.csv")

head(data_liver)
##   No    X1    X2   X3    X4    X5   X6      Y
## 1  1 16.36  8.90 3.47  6.02 57.42 1.11 158.76
## 2  2 26.68 21.22 3.53 12.07 61.38 1.36 197.19
## 3  3 12.49 16.62 2.00  8.88 67.42 1.47 144.73
## 4  4  8.45 22.86 6.71  7.46 69.94 1.31 140.06
## 5  5 10.19 14.23 4.75  2.06 65.68 1.25 129.71
## 6  6 19.53 17.35 1.95  7.54 59.63 1.14 162.59

Pembentukan model (Perhitungan matriks manual)

Membentuk Parameter Regresi

y <- as.matrix(data_liver$Y)
x1 <- as.matrix(data_liver$X1)
x2 <- as.matrix(data_liver$X2)
x4 <- as.matrix(data_liver$X4)

X <- as.matrix(cbind(1, x1, x2, x4))

n <- nrow(X)
p <- ncol(X)

head(X)
##      [,1]  [,2]  [,3]  [,4]
## [1,]    1 16.36  8.90  6.02
## [2,]    1 26.68 21.22 12.07
## [3,]    1 12.49 16.62  8.88
## [4,]    1  8.45 22.86  7.46
## [5,]    1 10.19 14.23  2.06
## [6,]    1 19.53 17.35  7.54
head(y)
##        [,1]
## [1,] 158.76
## [2,] 197.19
## [3,] 144.73
## [4,] 140.06
## [5,] 129.71
## [6,] 162.59
print(n)
## [1] 36
print(p)
## [1] 4
b <- solve(t(X)%*%X)%*%t(X)%*%y;round(b,4)
##          [,1]
## [1,] 122.1006
## [2,]   0.9242
## [3,]   0.6180
## [4,]   3.9523
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b4 <- b[4]

print(paste("b0:", b0))
## [1] "b0: 122.1006349907"
print(paste("b1:", b1))
## [1] "b1: 0.924234526233408"
print(paste("b2:", b2))
## [1] "b2: 0.617950334052231"
print(paste("b4:", b4))
## [1] "b4: 3.95233664011293"

Membentuk Koefisien determinasi dan penyesuaiannya

sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(X)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
##        [,1]
## [1,] 26.966

nilai koefisien regresi menunjukkan bahwa, secara rata-rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual \(y\).

df <- n-p
df
## [1] 32
y_duga <- b0+b1*x1+b2*x2+b4*x4
Y <- data.frame(y,y_duga);head(Y)
##        y   y_duga
## 1 158.76 166.5139
## 2 197.19 207.5768
## 3 144.73 179.0114
## 4 140.06 173.5212
## 5 129.71 148.4538
## 6 162.59 180.6730
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
##       [,1]
## [1,] 0.386

\(R^2\) mengukur proporsi variasi dalam variabel dependen \(y\) yang dapat dijelaskan oleh variabel independen dalam model. Nilai \(R^2\) sebesar 0.386 menunjukkan bahwa sekitar 38.6% dari variasi dalam \(y\) dapat dijelaskan oleh model regresi. Ini menunjukkan bahwa model memiliki kecocokan yang sangat baik dengan data.

R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));round(R_squared_adj,4)
##        [,1]
## [1,] 0.3284

Adjusted \(R^2\) memperhitungkan jumlah variabel independen dalam model dan memberikan penalti untuk model yang terlalu kompleks. Nilai adjusted \(R^2\) sebesar 0.3284 menunjukkan bahwa meskipun ada penalti untuk jumlah variabel, model masih mampu menjelaskan 32.84% dari variasi dalam \(y\).

Uji F dan Std.Error parameter regresi

galat <- y-(b0+b1*x1+b2*x2+b4*x4)
KTReg <- sum((y_duga-mean(y))^2)/(p-1);print(paste("KTR:",KTReg))
## [1] "KTR: 4876.19884277776"
KTG <- sum(galat^2)/(n-p);print(paste("KTG",KTG))
## [1] "KTG 727.147978975695"
Fhit <- KTReg/KTG;print(paste("Fhit:",round(Fhit,0)))
## [1] "Fhit: 7"
dbreg <- p-1;print(paste("dbr:",dbreg)) 
## [1] "dbr: 3"
dbg <- n-p;print(paste("dbg:",dbg)) 
## [1] "dbg: 32"
pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 0.001223205
se_b <- sqrt(sigma_kuadrat[1]*solve(t(X)%*%X))
## Warning in sqrt(sigma_kuadrat[1] * solve(t(X) %*% X)): NaNs produced
se_b
##           [,1]      [,2]      [,3]     [,4]
## [1,] 11.998293       NaN 1.1829188      NaN
## [2,]       NaN 0.8126577       NaN      NaN
## [3,]  1.182919       NaN 0.9055069      NaN
## [4,]       NaN       NaN       NaN 1.811898
se_b0 <- se_b[1,1];print(paste("se_b0:",round(se_b0,4)))
## [1] "se_b0: 11.9983"
se_b1 <- se_b[2,2];print(paste("se_b1:",round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3];print(paste("se_b2:",round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b4 <- se_b[4,4];print(paste("se_b4:",round(se_b4,4)))
## [1] "se_b4: 1.8119"

Signifikansi Parameter (nilai-t)

t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b4 <- b4/se_b4;print(paste("t_b4:",round(t_b4,2)))
## [1] "t_b4: 2.18"
print(paste("p_b0:",2*pt(-abs(t_b0),df <- n-p)))
## [1] "p_b0: 1.47506344832897e-11"
print(paste("p_b1:",2*pt(-abs(t_b1),df <- n-p)))
## [1] "p_b1: 0.263858037514289"
print(paste("p_b2:",2*pt(-abs(t_b2),df <- n-p)))
## [1] "p_b2: 0.499874045483506"
print(paste("p_b4:",2*pt(-abs(t_b4),df <- n-p)))
## [1] "p_b4: 0.0366193614765748"

Selang Kepercayaan (1-alfa)*100%

t <- qt(.975, df <- n-p)

BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0

BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1

BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2

BB_b4 <- b4-t*se_b4
BA_b4 <- b4+t*se_b4

Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6), round(BB_b4,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6), round(BA_b4,6)))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6), round(b4,6)))

Selang.Kepercayaan <- cbind(Batas.Bawah, Fit, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersep", "b1", "b2", "b4")
Selang.Kepercayaan
##          Batas bawah Selang (2.5%)        Fit Batas atas Selang (97.5%)
## Intersep                 97.660912 122.100635                146.540358
## b1                       -0.731095   0.924235                  2.579564
## b2                       -1.226507   0.617950                  2.462407
## b4                        0.261620   3.952337                  7.643053

Pembentukan model (Fungsi lm())

Dalam penggunaan fungsi lm(), kita akan memperoleh secara langsung nilai-nilai pada pemodelan regresi dari data yang kita miliki. Dengan menggunakan fungsi summary(), anova(), dan confint() dari model yang terbentuk maka dapat diperoleh nilai parameter, signifikansi, standar eror, koefisien determinasi, serta selang kepercayaan.

reg <- lm(y~x1+x2+x4, data=data_liver)
summary(reg)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x4, data = data_liver)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.799 -16.805  -1.352   9.696  51.355 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 122.1006    11.9983  10.177 1.48e-11 ***
## x1            0.9242     0.8127   1.137   0.2639    
## x2            0.6180     0.9055   0.682   0.4999    
## x4            3.9523     1.8119   2.181   0.0366 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared:  0.386,  Adjusted R-squared:  0.3284 
## F-statistic: 6.706 on 3 and 32 DF,  p-value: 0.001223
confint(reg)
##                  2.5 %     97.5 %
## (Intercept) 97.6609117 146.540358
## x1          -0.7310951   2.579564
## x2          -1.2265068   2.462407
## x4           0.2616205   7.643053
anova(reg)
## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## x1         1  8501.9  8501.9 11.6922 0.001731 **
## x2         1  2666.8  2666.8  3.6674 0.064461 . 
## x4         1  3459.9  3459.9  4.7582 0.036619 * 
## Residuals 32 23268.7   727.1                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Perbandingan manual (matriks) dan fungsi lm()

koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks', 'fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b4")
penduga
##              matriks   fungsi lm
## intersep 122.1006350 122.1006350
## b1         0.9242345   0.9242345
## b2         0.6179503   0.6179503
## b4         3.9523366   3.9523366

Berdasar perbandingan ini diperoleh hasil yang sama yang artinya penggunaan matriks secara manual sudah tepat dalam memodelkan regresi linear berganda bergitu pula sebaliknya.