library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("C:/Users/RAKESHA/Downloads/data liver.csv", sep=";")

y<-data$Y
x1<-data$X1
x2<-data$X2
x3<-data$X3
x4<-data$X4 
x5<-data$X5
x6<-data$X6

data<-data.frame(cbind(y,x1,x2,x3,x4,x5,x6))
head(data)
##        y    x1    x2   x3    x4    x5   x6
## 1 158.76 16.36  8.90 3.47  6.02 57.42 1.11
## 2 197.19 26.68 21.22 3.53 12.07 61.38 1.36
## 3 144.73 12.49 16.62 2.00  8.88 67.42 1.47
## 4 140.06  8.45 22.86 6.71  7.46 69.94 1.31
## 5 129.71 10.19 14.23 4.75  2.06 65.68 1.25
## 6 162.59 19.53 17.35 1.95  7.54 59.63 1.14
n<-nrow(data)
n
## [1] 36
p<-ncol(data)
p
## [1] 7

#EXPLORASI DATA

plot(x5,y,  main="Scatter Plot x5 & y", 
     xlab="Variabel X5", 
     ylab="Variabel Y", 
     pch=16, col="red")
abline(lm(y ~ x4), col="green")

Korelasi antara peubah x5 dan y adalah linear negatif dimana artinya semakin besar nilai peubah x5 maka semakin kecil nilai peubah y

##PERBANDINGAN DATA x1, x5, DAN Y

boxplot(x1, x5, y, xaxt = "n",
        col = c("light blue", "blue", "cyan"))
axis(1, at = 1:3, labels = c("x1", "x5","y"))

summary(x1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.550   8.502  12.265  14.680  19.810  35.410
summary(x5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   32.74   52.76   58.13   58.33   65.66   79.09
summary(y)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   120.9   143.9   160.7   169.7   191.8   247.4

#Pembentukan model tanpa fungsi bawaan (manual) ## Parameter Regresi

b1<-(sum(x5*y)-sum(x5)*sum(y)/n)/(sum(x5^2)-(sum(x5)^2/n))
b0<-mean(y)-b1*mean(x5)

##Koefisien Determinasi dan penyesuaiannya

r<-(sum(x5*y)-sum(x5)*sum(y)/n)/
sqrt((sum(x5^2)-(sum(x5)^2/n))*(sum(y^2)-(sum(y)^2/n)))
Koef_det<-r^2
Koef_det
## [1] 0.5407857
Adj_R2<-1-((1-Koef_det)*(n-1)/(n-1-1))
Adj_R2
## [1] 0.5272794

Std. Error parameter regresi

galat<-y-(b0+b1*x5)
ragam_galat<-sum(galat^2)/(n-2)

se_b1<-sqrt(ragam_galat/sum((x1-mean(x5))^2))
se_b1
## [1] 0.08520913
se_b0<-sqrt(ragam_galat*(1/n+mean(x5)^2/sum((x5-mean(x5))^2)))
se_b0
## [1] 20.47648

##Signifikansi Parameter (nilai-t)

t_b0<-b0/se_b0
t_b0
## [1] 14.5083
t_b1<-b1/se_b1
t_b1
## [1] -25.62497
2*pt(-abs(t_b0 ),df<-n-2)
## [1] 3.965296e-16
2*pt(-abs(t_b1 ),df<-n-2)
## [1] 8.151134e-24

##Ukuran Keragaman

galat<-y-(b0+b1*x5)

JKG <- sum((y - (b0+b1*x5))^2)
JKReg <- sum(((b0+b1*x5)- mean(y))^2)
JKT <- sum((y - mean(y))^2)
JKT <- JKReg+JKG

dbReg<-1
dbg<-n-2
dbt<-n-1

Fhit<-(JKReg/dbReg)/(JKG/dbg)
Fhit
## [1] 40.03951
P.value<-1-pf(Fhit, dbReg, dbg, lower.tail <- F)
P.value
## [1] 3.243599e-07

#Pembentukan model dengan fungsi lm

model<-lm(y~x5,data<-data)
summary(model)
## 
## Call:
## lm(formula = y ~ x5, data = data <- data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.804 -12.618  -4.058   9.055  63.339 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 297.0789    20.4765  14.508 3.97e-16 ***
## x5           -2.1835     0.3451  -6.328 3.24e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.62 on 34 degrees of freedom
## Multiple R-squared:  0.5408, Adjusted R-squared:  0.5273 
## F-statistic: 40.04 on 1 and 34 DF,  p-value: 3.244e-07
anova(model)
## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x5         1  20494 20494.3  40.039 3.244e-07 ***
## Residuals 34  17403   511.9                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ŷ = 297.0789 - 2.1835x

Dapat di interpretasikan bahwa setiap kenaikan x5 satu-satuan mengakibatkan pengurangan bertambah sebesar 2.1835 kali lipat untuk y dugaan pengamatan.Nilai 297.0789 adalah nilai y ketika x sama dengan nol (intersep sumbu y)jika x = 0 masuk dalam selang pengamatan. Dengan demikian, persamaan tersebut memberikan hubungan linear antara variabel x dan y dengan kemiringan negatif, yang berarti ada hubungan yang terbalik antara kedua variabel tersebut.