data_liver <- read.csv("C:/Users/FAQIH/Downloads/data liver.csv")
head(data_liver)
## No X1 X2 X3 X4 X5 X6 Y
## 1 1 16.36 8.90 3.47 6.02 57.42 1.11 158.76
## 2 2 26.68 21.22 3.53 12.07 61.38 1.36 197.19
## 3 3 12.49 16.62 2.00 8.88 67.42 1.47 144.73
## 4 4 8.45 22.86 6.71 7.46 69.94 1.31 140.06
## 5 5 10.19 14.23 4.75 2.06 65.68 1.25 129.71
## 6 6 19.53 17.35 1.95 7.54 59.63 1.14 162.59
n <- 36
x0 <- rep(1,n)
dt <- data.frame(data_liver$Y,x0,data_liver$X1,data_liver$X2,data_liver$X4)
head(dt)
## data_liver.Y x0 data_liver.X1 data_liver.X2 data_liver.X4
## 1 158.76 1 16.36 8.90 6.02
## 2 197.19 1 26.68 21.22 12.07
## 3 144.73 1 12.49 16.62 8.88
## 4 140.06 1 8.45 22.86 7.46
## 5 129.71 1 10.19 14.23 2.06
## 6 162.59 1 19.53 17.35 7.54
par(mfrow=c(1,2))
plot(data_liver$X1, data_liver$Y, pch=19)
plot(data_liver$X2, data_liver$Y, pch=19)
plot(data_liver$X4, data_liver$Y, pch=19)
Dari ketiga plot eksplorasi data diatas memberikan gambaran
kecenderungan korelasi antar peubah y dengan tiap variabel x menunjukkan
tren positif namun dengan keragaman yang besar.
y <- as.matrix(data_liver$Y)
X <- as.matrix(cbind(x0,data_liver$X1,data_liver$X2,data_liver$X4))
b <- solve(t(X)%*%X)%*%t(X)%*%y;round(b,4)
## [,1]
## x0 122.1006
## 0.9242
## 0.6180
## 3.9523
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b4 <- b[4]
print(paste("b0:",b[1]))
## [1] "b0: 122.1006349907"
print(paste("b1:",b[2]))
## [1] "b1: 0.924234526233408"
print(paste("b2:",b[3]))
## [1] "b2: 0.617950334052231"
print(paste("b4:",b[4]))
## [1] "b4: 3.95233664011293"
n<-36
p<-4
sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(X)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
## [,1]
## [1,] 26.966
Nilai koefisien regresi menunjukkan, secara rata - rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual y.
df <- n-p
df
## [1] 32
y_duga <- b0+b1*data_liver$X1+b2*data_liver$X2+b4*data_liver$X4
Y <- data.frame(y,y_duga);head(Y)
## y y_duga
## 1 158.76 166.5139
## 2 197.19 207.5768
## 3 144.73 179.0114
## 4 140.06 173.5212
## 5 129.71 148.4538
## 6 162.59 180.6730
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
## [,1]
## [1,] 0.386
Nilai R square sebesar 0.386 menunjukkan bahwa hanya sekitar38.6% dari variasi dalam y yang dapat dijelaskan oleh model regresi. Ini menunjukkan bahwa model kurang memiliki kecocokan dengan data.
R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));round(R_squared_adj,4)
## [,1]
## [1,] 0.3284
Nilai Adjusted R square sebesar 0.3284 menunjukkan terdapat pinalti untuk jumlah variabel sehingga model hanya mampu menjelaskan 32.84% dari variasi nilai y.
galat <- y-(b0+b1*data_liver$X1+b2*data_liver$X2+b4*data_liver$X4)
KTReg <- sum((y_duga-mean(y))^2)/(p-1);print(paste("KTR:",KTReg))
## [1] "KTR: 4876.19884277776"
KTG <- sum(galat^2)/(n-p);print(paste("KTG",KTG))
## [1] "KTG 727.147978975695"
Fhit <- KTReg/KTG;print(paste("Fhit:",round(Fhit,0)))
## [1] "Fhit: 7"
dbreg <- p-1;print(paste("dbr:",dbreg)) #Derajat bebas regresi
## [1] "dbr: 3"
dbg <- n-p;print(paste("dbg:",dbg)) #Derajat bebas galat
## [1] "dbg: 32"
pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 0.001223205
se_b <- sqrt(sigma_kuadrat[1]*solve(t(X)%*%X))
## Warning in sqrt(sigma_kuadrat[1] * solve(t(X) %*% X)): NaNs produced
se_b
## x0
## x0 11.998293 NaN 1.1829188 NaN
## NaN 0.8126577 NaN NaN
## 1.182919 NaN 0.9055069 NaN
## NaN NaN NaN 1.811898
se_b0 <- se_b[1,1];print(paste("se_b0:",round(se_b0,4)))
## [1] "se_b0: 11.9983"
se_b1 <- se_b[2,2];print(paste("se_b1:",round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3];print(paste("se_b2:",round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b4 <- se_b[4,4];print(paste("se_b4:",round(se_b4,4)))
## [1] "se_b4: 1.8119"
t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b4 <- b4/se_b4;print(paste("t_b4:",round(t_b4,2)))
## [1] "t_b4: 2.18"
print(paste("p_b0:",2*pt(-abs(t_b0),df <- n-p)))
## [1] "p_b0: 1.47506344832897e-11"
print(paste("p_b1:",2*pt(-abs(t_b1),df <- n-p)))
## [1] "p_b1: 0.263858037514289"
print(paste("p_b2:",2*pt(-abs(t_b2),df <- n-p)))
## [1] "p_b2: 0.499874045483506"
print(paste("p_b4:",2*pt(-abs(t_b4),df <- n-p)))
## [1] "p_b4: 0.0366193614765748"
t <- qt(.975, df <- n-p)
BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0
BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1
BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2
BB_b4 <- b4-t*se_b4
BA_b4 <- b4+t*se_b4
Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),round(BB_b4,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),round(BA_b4,6)))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6),round(b4,6)))
Selang.Kepercayaan <- cbind(Batas.Bawah, Fit, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersep", "b1", "b2","b4")
Selang.Kepercayaan
## Batas bawah Selang (2.5%) Fit Batas atas Selang (97.5%)
## Intersep 97.660912 122.100635 146.540358
## b1 -0.731095 0.924235 2.579564
## b2 -1.226507 0.617950 2.462407
## b4 0.261620 3.952337 7.643053
reg <- lm(y ~ data_liver$X1 + data_liver$X2 + data_liver$X4, data=dt)
summary(reg)
##
## Call:
## lm(formula = y ~ data_liver$X1 + data_liver$X2 + data_liver$X4,
## data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.799 -16.805 -1.352 9.696 51.355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.1006 11.9983 10.177 1.48e-11 ***
## data_liver$X1 0.9242 0.8127 1.137 0.2639
## data_liver$X2 0.6180 0.9055 0.682 0.4999
## data_liver$X4 3.9523 1.8119 2.181 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared: 0.386, Adjusted R-squared: 0.3284
## F-statistic: 6.706 on 3 and 32 DF, p-value: 0.001223
anova(reg)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## data_liver$X1 1 8501.9 8501.9 11.6922 0.001731 **
## data_liver$X2 1 2666.8 2666.8 3.6674 0.064461 .
## data_liver$X4 1 3459.9 3459.9 4.7582 0.036619 *
## Residuals 32 23268.7 727.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 97.6609117 146.540358
## data_liver$X1 -0.7310951 2.579564
## data_liver$X2 -1.2265068 2.462407
## data_liver$X4 0.2616205 7.643053
koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks','fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b4")
penduga
## matriks fungsi lm
## intersep 122.1006350 122.1006350
## b1 0.9242345 0.9242345
## b2 0.6179503 0.6179503
## b4 3.9523366 3.9523366
Berdasarkan perbandingan ini diperoleh hasil yang sama yang berarti penggunaan matriks secara manual sudah tepat dalam memodelkan regresi linear berganda begitu pula sebaliknya.
```