library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
X0 <- rep(1,36)
data_liver <- read.csv("C:/Users/lenovo/OneDrive/ドキュメント/anreg/Minggu 4/data liver.csv")
data_liver$X0 <- rep(1,36)
data_liver2 <- data_liver %>% select("Y", "X0", "X1", "X2", "X4")
Y <- data_liver$Y
X1 <- data_liver$X1
X2 <- data_liver$X2
X4 <- data_liver$X4
par(mfrow=c(1,3))
plot(X1, Y, pch=19)
plot(X2, Y, pch=19)
plot(X4, Y, pch=19)
y <- as.matrix(Y)
x <- as.matrix(cbind(X0,X1,X2,X4))
b <- solve(t(x)%*%x)%*%t(x)%*%y;round(b,4)
## [,1]
## X0 122.1006
## X1 0.9242
## X2 0.6180
## X4 3.9523
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b3 <- b[4]
print(paste("b0:",b[1]))
## [1] "b0: 122.1006349907"
print(paste("b1:",b[2]))
## [1] "b1: 0.924234526233408"
print(paste("b2:",b[3]))
## [1] "b2: 0.617950334052231"
print(paste("b3:",b[4]))
## [1] "b3: 3.95233664011293"
n <- nrow(data_liver2)
p <- ncol(data_liver2) - 1
sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(x)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
## [,1]
## [1,] 26.966
nilai koefisien regresi menunjukkan bahwa, secara rata-rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual \(y\).
df <- n-p
df
## [1] 32
y_duga <- b0+b1*X1+b2*X2+b3*X4
Y <- data.frame(y,y_duga);head(Y)
## y y_duga
## 1 158.76 166.5139
## 2 197.19 207.5768
## 3 144.73 179.0114
## 4 140.06 173.5212
## 5 129.71 148.4538
## 6 162.59 180.6730
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
## [,1]
## [1,] 0.386
Nilai \(R^2\) sebesar 0.386 menunjukkan bahwa sekitar 38.6% dari variasi dalam \(y\) dapat dijelaskan oleh model regresi. Ini menunjukkan bahwa model memiliki kecocokan yang tidak terlalu baik dengan data.
R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));round(R_squared_adj,4)
## [,1]
## [1,] 0.3284
Nilai adjusted \(R^2\) sebesar 0.3284 menunjukkan bahwa meskipun ada penalti untuk jumlah variabel, model masih mampu menjelaskan 32.84% dari variasi dalam \(y\).
galat <- y-(b0+b1*X1+b2*X2+b3*X4)
KTReg <- sum((y_duga-mean(y))^2)/(p-1) ;print(paste("KTR:",KTReg))
## [1] "KTR: 4876.19884277776"
KTG <- sum(galat^2)/(n-p) ;print(paste("KTG",KTG))
## [1] "KTG 727.147978975695"
Fhit <- KTReg/KTG;print(paste("Fhit:",round(Fhit,0)))
## [1] "Fhit: 7"
dbreg <- p-1;print(paste("dbr:",dbreg))
## [1] "dbr: 3"
dbg <- n-p;print(paste("dbg:",dbg))
## [1] "dbg: 32"
pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 0.001223205
se_b <- sqrt(sigma_kuadrat[1]*solve(t(x)%*%x))
se_b
## X0 X1 X2 X4
## X0 11.998293 NaN 1.1829188 NaN
## X1 NaN 0.8126577 NaN NaN
## X2 1.182919 NaN 0.9055069 NaN
## X4 NaN NaN NaN 1.811898
se_b0 <- se_b[1,1];print(paste("se_b0:",round(se_b0,4)))
## [1] "se_b0: 11.9983"
se_b1 <- se_b[2,2];print(paste("se_b1:",round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3];print(paste("se_b2:",round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b3 <- se_b[4,4];print(paste("se_b3:",round(se_b3,4)))
## [1] "se_b3: 1.8119"
t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b3 <- b3/se_b3;print(paste("t_b3:",round(t_b3,2)))
## [1] "t_b3: 2.18"
print(paste("p_b0:",2*pt(-abs(t_b0),df <- n-p)))
## [1] "p_b0: 1.47506344832897e-11"
print(paste("p_b1:",2*pt(-abs(t_b1),df <- n-p)))
## [1] "p_b1: 0.263858037514289"
print(paste("p_b2:",2*pt(-abs(t_b2),df <- n-p)))
## [1] "p_b2: 0.499874045483506"
print(paste("p_b3:",2*pt(-abs(t_b3),df <- n-p)))
## [1] "p_b3: 0.0366193614765748"
t <- qt(.975, df <- n-p)
BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0
BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1
BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2
BB_b3 <- b3-t*se_b3
BA_b3 <- b3+t*se_b3
Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),round(BB_b3,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),round(BA_b3,6)))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6),round(b3,6)))
Selang.Kepercayaan <- cbind(Batas.Bawah, Fit, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersep", "b1", "b2", "b3")
Selang.Kepercayaan
## Batas bawah Selang (2.5%) Fit Batas atas Selang (97.5%)
## Intersep 97.660912 122.100635 146.540358
## b1 -0.731095 0.924235 2.579564
## b2 -1.226507 0.617950 2.462407
## b3 0.261620 3.952337 7.643053
reg <- lm(y~X1+X2+X4, data=data_liver2)
summary(reg)
##
## Call:
## lm(formula = y ~ X1 + X2 + X4, data = data_liver2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.799 -16.805 -1.352 9.696 51.355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.1006 11.9983 10.177 1.48e-11 ***
## X1 0.9242 0.8127 1.137 0.2639
## X2 0.6180 0.9055 0.682 0.4999
## X4 3.9523 1.8119 2.181 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared: 0.386, Adjusted R-squared: 0.3284
## F-statistic: 6.706 on 3 and 32 DF, p-value: 0.001223
anova(reg)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 8501.9 8501.9 11.6922 0.001731 **
## X2 1 2666.8 2666.8 3.6674 0.064461 .
## X4 1 3459.9 3459.9 4.7582 0.036619 *
## Residuals 32 23268.7 727.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 97.6609117 146.540358
## X1 -0.7310951 2.579564
## X2 -1.2265068 2.462407
## X4 0.2616205 7.643053
koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks', 'fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b3")
penduga
## matriks fungsi lm
## intersep 122.1006350 122.1006350
## b1 0.9242345 0.9242345
## b2 0.6179503 0.6179503
## b3 3.9523366 3.9523366
Berdasar perbandingan ini diperoleh hasil yang sama yang artinya penggunaan matriks secara manual sudah tepat dalam memodelkan regresi linear berganda bergitu pula sebaliknya.