library(readr)
# Membaca file Excel
data <- read_csv("C:/Users/salsa/Downloads/data liver (1).csv")
## Rows: 36 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): No, X1, X2, X3, X4, X5, X6, Y
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 8
## No X1 X2 X3 X4 X5 X6 Y
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.4 8.9 3.47 6.02 57.4 1.11 159.
## 2 2 26.7 21.2 3.53 12.1 61.4 1.36 197.
## 3 3 12.5 16.6 2 8.88 67.4 1.47 145.
## 4 4 8.45 22.9 6.71 7.46 69.9 1.31 140.
## 5 5 10.2 14.2 4.75 2.06 65.7 1.25 130.
## 6 6 19.5 17.4 1.95 7.54 59.6 1.14 163.
str(data)
## spc_tbl_ [36 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ No: num [1:36] 1 2 3 4 5 6 7 8 9 10 ...
## $ X1: num [1:36] 16.36 26.68 12.49 8.45 10.19 ...
## $ X2: num [1:36] 8.9 21.2 16.6 22.9 14.2 ...
## $ X3: num [1:36] 3.47 3.53 2 6.71 4.75 1.95 2.21 4.25 4.1 0.78 ...
## $ X4: num [1:36] 6.02 12.07 8.88 7.46 2.06 ...
## $ X5: num [1:36] 57.4 61.4 67.4 69.9 65.7 ...
## $ X6: num [1:36] 1.11 1.36 1.47 1.31 1.25 1.14 1.07 1.73 0.87 1.47 ...
## $ Y : num [1:36] 159 197 145 140 130 ...
## - attr(*, "spec")=
## .. cols(
## .. No = col_double(),
## .. X1 = col_double(),
## .. X2 = col_double(),
## .. X3 = col_double(),
## .. X4 = col_double(),
## .. X5 = col_double(),
## .. X6 = col_double(),
## .. Y = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## No X1 X2 X3
## Min. : 1.00 Min. : 4.550 Min. : 1.270 Min. : 0.570
## 1st Qu.: 9.75 1st Qu.: 8.502 1st Qu.: 6.335 1st Qu.: 1.718
## Median :18.50 Median :12.265 Median : 9.220 Median : 2.240
## Mean :18.50 Mean :14.680 Mean :11.549 Mean : 3.070
## 3rd Qu.:27.25 3rd Qu.:19.810 3rd Qu.:15.207 3rd Qu.: 3.485
## Max. :36.00 Max. :35.410 Max. :36.360 Max. :14.230
## X4 X5 X6 Y
## Min. : 2.060 Min. :32.74 Min. :0.630 Min. :120.9
## 1st Qu.: 4.065 1st Qu.:52.76 1st Qu.:1.015 1st Qu.:143.9
## Median : 6.095 Median :58.13 Median :1.105 Median :160.7
## Mean : 6.811 Mean :58.33 Mean :1.141 Mean :169.7
## 3rd Qu.: 8.998 3rd Qu.:65.66 3rd Qu.:1.302 3rd Qu.:191.8
## Max. :15.000 Max. :79.09 Max. :1.730 Max. :247.4
print(class(df)) # Harusnya output: "data.frame"
## [1] "function"
str(df) # Periksa struktur kolom
## function (x, df1, df2, ncp, log = FALSE)
head(df) # Lihat contoh data
##
## 1 function (x, df1, df2, ncp, log = FALSE)
## 2 {
## 3 if (missing(ncp))
## 4 .Call(C_df, x, df1, df2, log)
## 5 else .Call(C_dnf, x, df1, df2, ncp, log)
## 6 }
model <- lm(Y ~ X1 + X2 + X4, data = data)
summary(model) # Menampilkan koefisien, R-squared, dan p-value
##
## Call:
## lm(formula = Y ~ X1 + X2 + X4, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.799 -16.805 -1.352 9.696 51.355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.1006 11.9983 10.177 1.48e-11 ***
## X1 0.9242 0.8127 1.137 0.2639
## X2 0.6180 0.9055 0.682 0.4999
## X4 3.9523 1.8119 2.181 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared: 0.386, Adjusted R-squared: 0.3284
## F-statistic: 6.706 on 3 and 32 DF, p-value: 0.001223
df <- read.csv("C:/Users/salsa/Downloads/data liver (1).csv") # Ganti dengan path file yang benar
df
## No X1 X2 X3 X4 X5 X6 Y
## 1 1 16.36 8.90 3.47 6.02 57.42 1.11 158.76
## 2 2 26.68 21.22 3.53 12.07 61.38 1.36 197.19
## 3 3 12.49 16.62 2.00 8.88 67.42 1.47 144.73
## 4 4 8.45 22.86 6.71 7.46 69.94 1.31 140.06
## 5 5 10.19 14.23 4.75 2.06 65.68 1.25 129.71
## 6 6 19.53 17.35 1.95 7.54 59.63 1.14 162.59
## 7 7 20.65 10.48 2.21 4.88 59.42 1.07 178.48
## 8 8 22.96 14.23 4.25 3.69 75.08 1.73 120.90
## 9 9 21.22 21.64 4.10 11.94 43.42 0.87 191.24
## 10 10 8.11 3.16 0.78 8.82 75.12 1.47 150.03
## 11 11 24.74 7.84 1.68 3.68 57.65 1.08 173.44
## 12 12 11.38 15.71 3.56 7.20 39.93 0.74 211.98
## 13 13 15.82 15.04 2.40 9.89 51.27 1.02 193.49
## 14 14 8.36 9.01 2.01 3.40 50.52 0.94 164.04
## 15 15 12.04 9.72 2.27 6.03 51.60 1.05 156.97
## 16 16 10.97 4.58 1.73 5.55 56.63 1.03 208.36
## 17 17 7.97 9.33 0.57 4.17 79.09 1.61 154.62
## 18 18 7.46 6.11 1.73 2.99 57.20 1.07 137.38
## 19 19 29.09 15.71 3.41 9.35 56.44 1.10 180.15
## 20 20 10.30 8.54 2.32 10.78 60.43 1.17 228.47
## 21 21 7.82 4.41 1.07 4.19 59.52 1.00 153.62
## 22 22 14.71 6.29 1.77 6.16 65.05 1.30 121.31
## 23 23 8.54 6.73 1.27 5.52 65.65 1.17 157.37
## 24 24 23.05 11.34 5.39 3.00 33.57 0.63 211.27
## 25 25 13.12 5.86 1.89 10.92 52.93 0.90 178.16
## 26 26 7.41 9.11 2.05 5.50 53.72 0.91 174.89
## 27 27 14.59 5.59 1.26 3.75 58.62 1.14 142.98
## 28 28 8.52 6.52 1.00 6.92 56.61 1.11 165.59
## 29 29 18.97 6.35 2.94 5.61 56.41 1.07 141.54
## 30 30 35.41 36.36 14.23 15.00 41.52 0.89 238.22
## 31 31 4.55 1.27 3.13 2.83 70.91 1.27 138.42
## 32 32 22.59 28.70 10.51 10.35 32.74 0.66 247.45
## 33 33 9.21 4.55 1.19 7.92 72.20 1.34 140.27
## 34 34 18.32 11.61 2.91 8.07 52.23 1.02 216.06
## 35 35 5.69 6.88 1.18 2.78 72.12 1.39 144.18
## 36 36 11.21 11.92 3.31 10.29 60.65 1.69 156.22
df_baru <- df[, c("X1", "X2", "X4", "Y"), drop = FALSE]
head(df_baru)
## X1 X2 X4 Y
## 1 16.36 8.90 6.02 158.76
## 2 26.68 21.22 12.07 197.19
## 3 12.49 16.62 8.88 144.73
## 4 8.45 22.86 7.46 140.06
## 5 10.19 14.23 2.06 129.71
## 6 19.53 17.35 7.54 162.59
par(mfrow=c(1,3))
plot(df$X1, df$Y, main="Scatter Plot X1 vs Y", xlab="X1", ylab="Y", pch=19, col="blue")
plot(df$X2, df$Y, main="Scatter Plot X2 vs Y", xlab="X2", ylab="Y", pch=17, col="red")
plot(df$X4, df$Y, main="Scatter Plot X4 vs Y", xlab="X4", ylab="Y", pch=15, col="green")
# Perhitungan Manual
library(readr)
df <- read_csv("C:/Users/salsa/Downloads/data liver (1).csv")
## Rows: 36 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (8): No, X1, X2, X3, X4, X5, X6, Y
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 8
## No X1 X2 X3 X4 X5 X6 Y
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 16.4 8.9 3.47 6.02 57.4 1.11 159.
## 2 2 26.7 21.2 3.53 12.1 61.4 1.36 197.
## 3 3 12.5 16.6 2 8.88 67.4 1.47 145.
## 4 4 8.45 22.9 6.71 7.46 69.9 1.31 140.
## 5 5 10.2 14.2 4.75 2.06 65.7 1.25 130.
## 6 6 19.5 17.4 1.95 7.54 59.6 1.14 163.
colnames(df_baru) <- c("x1", "x2", "x4", "y")
df_baru
## x1 x2 x4 y
## 1 16.36 8.90 6.02 158.76
## 2 26.68 21.22 12.07 197.19
## 3 12.49 16.62 8.88 144.73
## 4 8.45 22.86 7.46 140.06
## 5 10.19 14.23 2.06 129.71
## 6 19.53 17.35 7.54 162.59
## 7 20.65 10.48 4.88 178.48
## 8 22.96 14.23 3.69 120.90
## 9 21.22 21.64 11.94 191.24
## 10 8.11 3.16 8.82 150.03
## 11 24.74 7.84 3.68 173.44
## 12 11.38 15.71 7.20 211.98
## 13 15.82 15.04 9.89 193.49
## 14 8.36 9.01 3.40 164.04
## 15 12.04 9.72 6.03 156.97
## 16 10.97 4.58 5.55 208.36
## 17 7.97 9.33 4.17 154.62
## 18 7.46 6.11 2.99 137.38
## 19 29.09 15.71 9.35 180.15
## 20 10.30 8.54 10.78 228.47
## 21 7.82 4.41 4.19 153.62
## 22 14.71 6.29 6.16 121.31
## 23 8.54 6.73 5.52 157.37
## 24 23.05 11.34 3.00 211.27
## 25 13.12 5.86 10.92 178.16
## 26 7.41 9.11 5.50 174.89
## 27 14.59 5.59 3.75 142.98
## 28 8.52 6.52 6.92 165.59
## 29 18.97 6.35 5.61 141.54
## 30 35.41 36.36 15.00 238.22
## 31 4.55 1.27 2.83 138.42
## 32 22.59 28.70 10.35 247.45
## 33 9.21 4.55 7.92 140.27
## 34 18.32 11.61 8.07 216.06
## 35 5.69 6.88 2.78 144.18
## 36 11.21 11.92 10.29 156.22
\[ b_{(k+1)\times1}=(X'X)_{(k+1)\times(k+1)}^{-1}X'_{(k+1)\times{n}}y_{(n\times1)} \]
y <- as.matrix(df_baru$y)
X <- as.matrix(cbind(1, df_baru[, c("x1", "x2", "x4")]))
b <- solve(t(X) %*% X) %*% t(X) %*% y
b <- matrix(round(b, 4), ncol = 1)
b
## [,1]
## [1,] 122.1006
## [2,] 0.9242
## [3,] 0.6180
## [4,] 3.9523
n <- nrow(X)
n
## [1] 36
p <- ncol(X)
p
## [1] 4
b <- solve(t(X) %*% X) %*% t(X) %*% y
b <- as.vector(round(b, 4)) # Pastikan dalam bentuk vektor
df_koef <- data.frame(Variabel = c("Intercept", "X1", "X2", "X4"), Koefisien = b)
print(df_koef)
## Variabel Koefisien
## 1 Intercept 122.1006
## 2 X1 0.9242
## 3 X2 0.6180
## 4 X4 3.9523
\[ \hat{\sigma}^2=\frac{SSR_{esidual}}{n-p}=\frac{y'y-\hat{\beta}x'y}{n-p} \] ## Perhitungan Varians Residual dan Standard Error ### Koefisien regresi dengan Metode OLS
sigma_kuadrat <- as.numeric((t(y) %*% y - t(b) %*% t(X) %*% y) / (n - p))
Res_se <- sqrt(sigma_kuadrat)
cat("Residual Standard Error (Res_se):", round(Res_se, 3), "\n")
## Residual Standard Error (Res_se): 26.966
Nilai koefisien regresi menunjukkan bahwa, secara rata-rata, prediksi model menyimpang sekitar 26.966 unit dari nilai aktual y.
df <- n-p
df
## [1] 32
b0 <- b[1]
b1 <- b[2]
b2 <- b[3]
b3 <- b[4]
print(paste("b0:",b[1]))
## [1] "b0: 122.1006"
print(paste("b1:",b[2]))
## [1] "b1: 0.9242"
print(paste("b2:",b[3]))
## [1] "b2: 0.618"
print(paste("b3:",b[4]))
## [1] "b3: 3.9523"
y_duga <- b0 + b1 * df_baru$x1 + b2 * df_baru$x2 + b3 * df_baru$x4
y_duga
## [1] 166.5136 207.5765 179.0114 173.5217 148.4541 180.6729 166.9492 166.6984
## [9] 202.2761 166.4080 164.3549 170.7833 185.1044 148.8329 163.0673 157.0048
## [17] 151.7135 144.5885 195.6484 179.5034 148.6134 163.9290 155.9691 162.2684
## [25] 181.0067 156.3166 153.8604 161.3541 165.7294 236.5815 138.2756 201.6212
## [33] 164.7266 178.1020 142.5985 180.4966
Y <- data.frame(y = df_baru$y, y_duga)
head(Y)
## y y_duga
## 1 158.76 166.5136
## 2 197.19 207.5765
## 3 144.73 179.0114
## 4 140.06 173.5217
## 5 129.71 148.4541
## 6 162.59 180.6729
R_squared <- (cor(y,y_duga))^2;round(R_squared,4)
## [,1]
## [1,] 0.386
dbreg <- p-1
print(paste("dbr:", dbreg)) # Derajat bebas regresi
## [1] "dbr: 3"
dbg <- n-p
print(paste("dbg:", dbg)) # Derajat bebas galat
## [1] "dbg: 32"
KTReg <- sum((y_duga - mean(y))^2) / (p-1)
print(paste("KTR:", KTReg))
## [1] "KTR: 4876.20025265227"
galat <- y - (b0 + b1*df_baru$x1 + b2*df_baru$x2 + b3*df_baru$x4)
galat
## [,1]
## [1,] -7.753558
## [2,] -10.386477
## [3,] -34.281442
## [4,] -33.461728
## [5,] -18.744076
## [6,] -18.082868
## [7,] 11.530806
## [8,] -45.798359
## [9,] -11.036106
## [10,] -16.378028
## [11,] 9.085108
## [12,] 41.196664
## [13,] 8.385589
## [14,] 15.207088
## [15,] -6.097297
## [16,] 51.355221
## [17,] 2.906495
## [18,] -7.208489
## [19,] -15.498363
## [20,] 48.966626
## [21,] 5.006639
## [22,] -42.618970
## [23,] 1.400896
## [24,] 49.001570
## [25,] -2.846700
## [26,] 18.573448
## [27,] -10.880423
## [28,] 4.235940
## [29,] -24.189377
## [30,] 1.638498
## [31,] 0.144421
## [32,] 45.828817
## [33,] -24.456598
## [34,] 37.958015
## [35,] 1.581468
## [36,] -24.276609
KTG <- sum(dbreg^2) / (n-p)
print(paste("KTG", KTG))
## [1] "KTG 0.28125"
Fhit <- KTReg / KTG
print(paste("Fhit:", round(Fhit,2)))
## [1] "Fhit: 17337.6"
pf(Fhit, dbreg, dbg, lower.tail = F)
## [1] 1.926295e-51
se_b <- sqrt(sigma_kuadrat[1] * solve(t(X) %*% X))
## Warning in sqrt(sigma_kuadrat[1] * solve(t(X) %*% X)): NaNs produced
se_b
## 1 x1 x2 x4
## 1 11.998636 NaN 1.1829526 NaN
## x1 NaN 0.8126809 NaN NaN
## x2 1.182953 NaN 0.9055327 NaN
## x4 NaN NaN NaN 1.81195
se_b0 <- se_b[1,1]
print(paste("se_b0:", round(se_b0,4)))
## [1] "se_b0: 11.9986"
se_b1 <- se_b[2,2]
print(paste("se_b1:", round(se_b1,4)))
## [1] "se_b1: 0.8127"
se_b2 <- se_b[3,3]
print(paste("se_b2:", round(se_b2,4)))
## [1] "se_b2: 0.9055"
se_b3 <- se_b [4,4]
print(paste("se_b3:", round(se_b3,4)))
## [1] "se_b3: 1.812"
t_b0 <- b0/se_b0;print(paste("t_b0:",round(t_b0,2)))
## [1] "t_b0: 10.18"
t_b1 <- b1/se_b1;print(paste("t_b1:",round(t_b1,2)))
## [1] "t_b1: 1.14"
t_b2 <- b2/se_b2;print(paste("t_b2:",round(t_b2,2)))
## [1] "t_b2: 0.68"
t_b3 <- b3/se_b3;print(paste("t_b3:",round(t_b3,2)))
## [1] "t_b3: 2.18"
print(paste("p_b0:", 2*pt(-abs(t_b0), df = n-p)))
## [1] "p_b0: 1.47611297300314e-11"
print(paste("p_b1:", 2*pt(-abs(t_b1), df = n-p)))
## [1] "p_b1: 0.263888899097137"
print(paste("p_b2:", 2*pt(-abs(t_b2), df = n-p)))
## [1] "p_b2: 0.499851987791175"
print(paste("p_b3:", 2*pt(-abs(t_b3), df = n-p)))
## [1] "p_b3: 0.0366259937750951"
t <- qt(.975, df <- n-p)
BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0
BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1
BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2
BB_b3 = b3-t*se_b3
BA_b3 = b3+t*se_b3
Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),(round(BB_b3,6))))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),(round(BB_b3,6))))
Fit <- (c(round(b0,6),round(b1,6),round(b2,6),round(BB_b3,6)))
Selang.Kepercayaan <- cbind(Batas.Bawah, b, Batas.Atas)
colnames(Selang.Kepercayaan) <- c("Batas bawah Selang (2.5%)", "Fit", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan) <- c("Intersep", "b1", "b2", "b3") # b3 untuk X4
Selang.Kepercayaan
## Batas bawah Selang (2.5%) Fit Batas atas Selang (97.5%)
## Intersep 97.660179 122.1006 146.541021
## b1 -0.731177 0.9242 2.579577
## b2 -1.226510 0.6180 2.462510
## b3 0.261478 3.9523 0.261478
reg <- lm(y ~ x1 + x2 + x4, data = df_baru)
summary(reg)
##
## Call:
## lm(formula = y ~ x1 + x2 + x4, data = df_baru)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.799 -16.805 -1.352 9.696 51.355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 122.1006 11.9983 10.177 1.48e-11 ***
## x1 0.9242 0.8127 1.137 0.2639
## x2 0.6180 0.9055 0.682 0.4999
## x4 3.9523 1.8119 2.181 0.0366 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.97 on 32 degrees of freedom
## Multiple R-squared: 0.386, Adjusted R-squared: 0.3284
## F-statistic: 6.706 on 3 and 32 DF, p-value: 0.001223
anova(reg)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 8501.9 8501.9 11.6922 0.001731 **
## x2 1 2666.8 2666.8 3.6674 0.064461 .
## x4 1 3459.9 3459.9 4.7582 0.036619 *
## Residuals 32 23268.7 727.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 97.6609117 146.540358
## x1 -0.7310951 2.579564
## x2 -1.2265068 2.462407
## x4 0.2616205 7.643053
Selang kepercayaan 95% menunjukkan perkiraan rentang untuk setiap koefisien. x1 dan x2 mencakup nol, sehingga pengaruhnya tidak pasti. x4 memiliki selang positif, menunjukkan pengaruh yang lebih signifikan terhadap y. ## Perbandingan manual (matriks) dan fungsi lm()
koef <- as.matrix(reg$coefficients)
penduga <- cbind(b, koef)
colnames(penduga) <- c('matriks', 'fungsi lm')
rownames(penduga) <- c("intersep", "b1", "b2", "b3")
penduga
## matriks fungsi lm
## intersep 122.1006 122.1006350
## b1 0.9242 0.9242345
## b2 0.6180 0.6179503
## b3 3.9523 3.9523366
Berdasarkan perbandingan yang telah dilakukan menunjukkan hasil yang sama bahwa penggunaan matriks manual sudah tepat dalam memodelkan regresi linear berganda. ## Perbandingan 2 Model
# Model 1
a <- summary(reg)
r.sq1 <- a$r.squared
adj_r.sq1 <- a$adj.r.squared
se1 <- a$sigma
# Model 2
reg2 <- lm(y~.-1, data = df_baru)
b <- summary(reg2)
r.sq2 <- b$r.squared
adj_r.sq2 <- b$adj.r.squared
se2 <- b$sigma
# Membandingkan kebaikan model
model1 <- as.matrix(c(r.sq1,adj_r.sq1,se1))
model2 <- as.matrix(c(r.sq2,adj_r.sq2,se2))
tabel <- cbind(model1, model2)
colnames(tabel) <- c("Model 1", "Model 2")
rownames(tabel) <- c("R-Square", "Adj R-Square", "Standar Error Sisaan")
tabel
## Model 1 Model 2
## R-Square 0.386006 0.9082997
## Adj R-Square 0.328444 0.8999633
## Standar Error Sisaan 26.965682 54.6540148
Perbandingan dua model regresi menunjukkan bahwa Adjusted R-Square (Adj. R²) lebih baik dalam menilai model dibandingkan R-Square (R²) karena mempertimbangkan jumlah variabel. Model 2 memiliki Adj. R² lebih tinggi (0.8999 vs. 0.3284), menunjukkan model ini lebih baik. Namun, Standar Error Sisaan juga perlu diperhatikan, karena semakin kecil nilainya, semakin baik model dalam memprediksi data.