TUGAS 2 ANALISIS REGRESI

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
X0 <- rep(1,36)
data_liver <- read.csv("C:/Users/lenovo/OneDrive/ドキュメント/anreg/Minggu 4/data liver.csv")
data_liver$X0 <- rep(1,36)
data_liver2 <- data_liver %>% select("Y", "X0", "X1", "X2", "X4")

Eksplorasi Data

Y <- data_liver$Y
X1 <- data_liver$X1
X2 <- data_liver$X2
X4 <- data_liver$X4

par(mfrow=c(1,3))
plot(X1, Y, pch=19)
plot(X2, Y, pch=19)
plot(X4, Y, pch=19)

Pembentukan model (Perhitungan matriks manual)

Parameter Regresi

y <- as.matrix(Y)
x <- as.matrix(cbind(X0,X1,X2,X4))
b <- solve(t(x)%*%x)%*%t(x)%*%y;round(b,4)
##        [,1]
## X0 122.1006
## X1   0.9242
## X2   0.6180
## X4   3.9523
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b3 <- b[4]

print(paste("b0:",b[1]))
## [1] "b0: 122.1006349907"
print(paste("b1:",b[2]))
## [1] "b1: 0.924234526233408"
print(paste("b2:",b[3]))
## [1] "b2: 0.617950334052231"
print(paste("b3:",b[4]))
## [1] "b3: 3.95233664011293"

Koefisien determinasi dan penyesuaiannya

n <- nrow(data_liver2)
p <- ncol(data_liver2) - 1
sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(x)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
##        [,1]
## [1,] 26.966

nilai koefisien regresi menunjukkan bahwa, secara rata-rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual \(y\).

df <- n-p
df
## [1] 32
y_duga <- b0+b1*X1+b2*X2+b3*X4
Y <- data.frame(y,y_duga);head(Y)
##        y   y_duga
## 1 158.76 166.5139
## 2 197.19 207.5768
## 3 144.73 179.0114
## 4 140.06 173.5212
## 5 129.71 148.4538
## 6 162.59 180.6730
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
##       [,1]
## [1,] 0.386

Nilai \(R^2\) sebesar 0.386 menunjukkan bahwa sekitar 38.6% dari variasi dalam \(y\) dapat dijelaskan oleh model regresi. Ini menunjukkan bahwa model memiliki kecocokan yang tidak terlalu baik dengan data.

R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));round(R_squared_adj,4)
##        [,1]
## [1,] 0.3284

Nilai adjusted \(R^2\) sebesar 0.3284 menunjukkan bahwa meskipun ada penalti untuk jumlah variabel, model masih mampu menjelaskan 32.84% dari variasi dalam \(y\).

Uji F dan Std.Error parameter regresi

galat <- y-(b0+b1*X1+b2*X2+b3*X4)
KTReg <- sum((y_duga-mean(y))^2)/(p-1) ;print(paste("KTR:",KTReg))
## [1] "KTR: 4876.19884277776"
KTG <- sum(galat^2)/(n-p) ;print(paste("KTG",KTG))
## [1] "KTG 727.147978975695"
Fhit <- KTReg/KTG;print(paste("Fhit:",round(Fhit,0)))
## [1] "Fhit: 7"
dbreg <- p-1;print(paste("dbr:",dbreg)) 
## [1] "dbr: 3"
dbg <- n-p;print(paste("dbg:",dbg))
## [1] "dbg: 32"

P-value

pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 0.001223205

Standard error koefisien regresi

se_b <- sqrt(sigma_kuadrat[1]*solve(t(x)%*%x))
se_b
##           X0        X1        X2       X4
## X0 11.998293       NaN 1.1829188      NaN
## X1       NaN 0.8126577       NaN      NaN
## X2  1.182919       NaN 0.9055069      NaN
## X4       NaN       NaN       NaN 1.811898
se_b0 <- se_b[1,1];print(paste("se_b0:",round(se_b0,4)))
## [1] "se_b0: 11.9983"
se_b1 <- se_b[2,2];print(paste("se_b1:",round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3];print(paste("se_b2:",round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b3 <- se_b[4,4];print(paste("se_b3:",round(se_b3,4)))
## [1] "se_b3: 1.8119"

Signifikansi Parameter

t-value

t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b3 <- b3/se_b3;print(paste("t_b3:",round(t_b3,2)))
## [1] "t_b3: 2.18"

p-value

print(paste("p_b0:",2*pt(-abs(t_b0),df <- n-p)))
## [1] "p_b0: 1.47506344832897e-11"
print(paste("p_b1:",2*pt(-abs(t_b1),df <- n-p)))
## [1] "p_b1: 0.263858037514289"
print(paste("p_b2:",2*pt(-abs(t_b2),df <- n-p)))
## [1] "p_b2: 0.499874045483506"
print(paste("p_b3:",2*pt(-abs(t_b3),df <- n-p)))
## [1] "p_b3: 0.0366193614765748"

Selang Kepercayaan (1-alfa)*100%

t <- qt(.975, df <- n-p)

BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0

BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1

BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2

BB_b3 <- b3-t*se_b3
BA_b3 <- b3+t*se_b3

Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),round(BB_b3,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),round(BA_b3,6)))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6),round(b3,6)))

Selang.Kepercayaan <- cbind(Batas.Bawah, Fit, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersep", "b1", "b2", "b3")
Selang.Kepercayaan
##          Batas bawah Selang (2.5%)        Fit Batas atas Selang (97.5%)
## Intersep                 97.660912 122.100635                146.540358
## b1                       -0.731095   0.924235                  2.579564
## b2                       -1.226507   0.617950                  2.462407
## b3                        0.261620   3.952337                  7.643053

Pembentukan model (Fungsi lm())

reg <- lm(y~X1+X2+X4, data=data_liver2)
summary(reg)
## 
## Call:
## lm(formula = y ~ X1 + X2 + X4, data = data_liver2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.799 -16.805  -1.352   9.696  51.355 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 122.1006    11.9983  10.177 1.48e-11 ***
## X1            0.9242     0.8127   1.137   0.2639    
## X2            0.6180     0.9055   0.682   0.4999    
## X4            3.9523     1.8119   2.181   0.0366 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared:  0.386,  Adjusted R-squared:  0.3284 
## F-statistic: 6.706 on 3 and 32 DF,  p-value: 0.001223
anova(reg)
## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## X1         1  8501.9  8501.9 11.6922 0.001731 **
## X2         1  2666.8  2666.8  3.6674 0.064461 . 
## X4         1  3459.9  3459.9  4.7582 0.036619 * 
## Residuals 32 23268.7   727.1                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(reg)
##                  2.5 %     97.5 %
## (Intercept) 97.6609117 146.540358
## X1          -0.7310951   2.579564
## X2          -1.2265068   2.462407
## X4           0.2616205   7.643053

Perbandingan manual (matriks) dan fungsi lm()

koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks', 'fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b3")
penduga
##              matriks   fungsi lm
## intersep 122.1006350 122.1006350
## b1         0.9242345   0.9242345
## b2         0.6179503   0.6179503
## b3         3.9523366   3.9523366

Berdasar perbandingan ini diperoleh hasil yang sama yang artinya penggunaan matriks secara manual sudah tepat dalam memodelkan regresi linear berganda bergitu pula sebaliknya.