Input Data

data_liver <- read.csv("C:/Users/FAQIH/Downloads/data liver.csv")
head(data_liver)
##   No    X1    X2   X3    X4    X5   X6      Y
## 1  1 16.36  8.90 3.47  6.02 57.42 1.11 158.76
## 2  2 26.68 21.22 3.53 12.07 61.38 1.36 197.19
## 3  3 12.49 16.62 2.00  8.88 67.42 1.47 144.73
## 4  4  8.45 22.86 6.71  7.46 69.94 1.31 140.06
## 5  5 10.19 14.23 4.75  2.06 65.68 1.25 129.71
## 6  6 19.53 17.35 1.95  7.54 59.63 1.14 162.59
n <- 36
x0 <- rep(1,n) 
dt <- data.frame(data_liver$Y,x0,data_liver$X1,data_liver$X2,data_liver$X4)
head(dt)
##   data_liver.Y x0 data_liver.X1 data_liver.X2 data_liver.X4
## 1       158.76  1         16.36          8.90          6.02
## 2       197.19  1         26.68         21.22         12.07
## 3       144.73  1         12.49         16.62          8.88
## 4       140.06  1          8.45         22.86          7.46
## 5       129.71  1         10.19         14.23          2.06
## 6       162.59  1         19.53         17.35          7.54

Eksplorasi data

par(mfrow=c(1,2))
plot(data_liver$X1, data_liver$Y, pch=19)
plot(data_liver$X2, data_liver$Y, pch=19)

plot(data_liver$X4, data_liver$Y, pch=19)

Dari ketiga plot eksplorasi data diatas memberikan gambaran kecenderungan korelasi antar peubah y dengan tiap variabel x menunjukkan tren positif namun dengan keragaman yang besar.

Pembentukan Model (Perhitungan Matriks Manual)

y <- as.matrix(data_liver$Y)
X <- as.matrix(cbind(x0,data_liver$X1,data_liver$X2,data_liver$X4))
b <- solve(t(X)%*%X)%*%t(X)%*%y;round(b,4)
##        [,1]
## x0 122.1006
##      0.9242
##      0.6180
##      3.9523
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b4 <- b[4]

print(paste("b0:",b[1]))
## [1] "b0: 122.1006349907"
print(paste("b1:",b[2]))
## [1] "b1: 0.924234526233408"
print(paste("b2:",b[3]))
## [1] "b2: 0.617950334052231"
print(paste("b4:",b[4]))
## [1] "b4: 3.95233664011293"

Koefisien determinasi dan penyesuaiannya

n<-36
p<-4
sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(X)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
##        [,1]
## [1,] 26.966

Nilai koefisien regresi menunjukkan, secara rata - rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual y.

df <- n-p
df
## [1] 32
y_duga <- b0+b1*data_liver$X1+b2*data_liver$X2+b4*data_liver$X4
Y <- data.frame(y,y_duga);head(Y)
##        y   y_duga
## 1 158.76 166.5139
## 2 197.19 207.5768
## 3 144.73 179.0114
## 4 140.06 173.5212
## 5 129.71 148.4538
## 6 162.59 180.6730
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
##       [,1]
## [1,] 0.386

Nilai R square sebesar 0.386 menunjukkan bahwa hanya sekitar38.6% dari variasi dalam y yang dapat dijelaskan oleh model regresi. Ini menunjukkan bahwa model kurang memiliki kecocokan dengan data.

R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));round(R_squared_adj,4)
##        [,1]
## [1,] 0.3284

Nilai Adjusted R square sebesar 0.3284 menunjukkan terdapat pinalti untuk jumlah variabel sehingga model hanya mampu menjelaskan 32.84% dari variasi nilai y.

Uji F dan Std.Error parameter regresi

galat <- y-(b0+b1*data_liver$X1+b2*data_liver$X2+b4*data_liver$X4)
KTReg <- sum((y_duga-mean(y))^2)/(p-1);print(paste("KTR:",KTReg))
## [1] "KTR: 4876.19884277776"
KTG <- sum(galat^2)/(n-p);print(paste("KTG",KTG))
## [1] "KTG 727.147978975695"
Fhit <- KTReg/KTG;print(paste("Fhit:",round(Fhit,0)))
## [1] "Fhit: 7"
dbreg <- p-1;print(paste("dbr:",dbreg)) #Derajat bebas regresi
## [1] "dbr: 3"
dbg <- n-p;print(paste("dbg:",dbg)) #Derajat bebas galat
## [1] "dbg: 32"
pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 0.001223205
se_b <- sqrt(sigma_kuadrat[1]*solve(t(X)%*%X))
## Warning in sqrt(sigma_kuadrat[1] * solve(t(X) %*% X)): NaNs produced
se_b
##           x0                             
## x0 11.998293       NaN 1.1829188      NaN
##          NaN 0.8126577       NaN      NaN
##     1.182919       NaN 0.9055069      NaN
##          NaN       NaN       NaN 1.811898
se_b0 <- se_b[1,1];print(paste("se_b0:",round(se_b0,4)))
## [1] "se_b0: 11.9983"
se_b1 <- se_b[2,2];print(paste("se_b1:",round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3];print(paste("se_b2:",round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b4 <- se_b[4,4];print(paste("se_b4:",round(se_b4,4)))
## [1] "se_b4: 1.8119"

Signifikansi Parameter (nilai-t)

t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b4 <- b4/se_b4;print(paste("t_b4:",round(t_b4,2)))
## [1] "t_b4: 2.18"
print(paste("p_b0:",2*pt(-abs(t_b0),df <- n-p)))
## [1] "p_b0: 1.47506344832897e-11"
print(paste("p_b1:",2*pt(-abs(t_b1),df <- n-p)))
## [1] "p_b1: 0.263858037514289"
print(paste("p_b2:",2*pt(-abs(t_b2),df <- n-p)))
## [1] "p_b2: 0.499874045483506"
print(paste("p_b4:",2*pt(-abs(t_b4),df <- n-p)))
## [1] "p_b4: 0.0366193614765748"

Selang Kepercayaan (1-alfa)*100%

t <- qt(.975, df <- n-p)

BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0

BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1

BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2

BB_b4 <- b4-t*se_b4
BA_b4 <- b4+t*se_b4

Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),round(BB_b4,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),round(BA_b4,6)))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6),round(b4,6)))

Selang.Kepercayaan <- cbind(Batas.Bawah, Fit, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersep", "b1", "b2","b4")
Selang.Kepercayaan
##          Batas bawah Selang (2.5%)        Fit Batas atas Selang (97.5%)
## Intersep                 97.660912 122.100635                146.540358
## b1                       -0.731095   0.924235                  2.579564
## b2                       -1.226507   0.617950                  2.462407
## b4                        0.261620   3.952337                  7.643053

Pembentukan Model (Fungsi lm())

reg <- lm(y ~ data_liver$X1 + data_liver$X2 + data_liver$X4, data=dt)
summary(reg)
## 
## Call:
## lm(formula = y ~ data_liver$X1 + data_liver$X2 + data_liver$X4, 
##     data = dt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -45.799 -16.805  -1.352   9.696  51.355 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   122.1006    11.9983  10.177 1.48e-11 ***
## data_liver$X1   0.9242     0.8127   1.137   0.2639    
## data_liver$X2   0.6180     0.9055   0.682   0.4999    
## data_liver$X4   3.9523     1.8119   2.181   0.0366 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared:  0.386,  Adjusted R-squared:  0.3284 
## F-statistic: 6.706 on 3 and 32 DF,  p-value: 0.001223
anova(reg)
## Analysis of Variance Table
## 
## Response: y
##               Df  Sum Sq Mean Sq F value   Pr(>F)   
## data_liver$X1  1  8501.9  8501.9 11.6922 0.001731 **
## data_liver$X2  1  2666.8  2666.8  3.6674 0.064461 . 
## data_liver$X4  1  3459.9  3459.9  4.7582 0.036619 * 
## Residuals     32 23268.7   727.1                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(reg)
##                    2.5 %     97.5 %
## (Intercept)   97.6609117 146.540358
## data_liver$X1 -0.7310951   2.579564
## data_liver$X2 -1.2265068   2.462407
## data_liver$X4  0.2616205   7.643053

Perbandingan Manual (Matriks) dan Fungsi lm()

koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks','fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b4")
penduga
##              matriks   fungsi lm
## intersep 122.1006350 122.1006350
## b1         0.9242345   0.9242345
## b2         0.6179503   0.6179503
## b4         3.9523366   3.9523366

Berdasarkan perbandingan ini diperoleh hasil yang sama yang berarti penggunaan matriks secara manual sudah tepat dalam memodelkan regresi linear berganda begitu pula sebaliknya.

```