Inferensi Regresi Linier Sederhana

Data

Data yang digunakan dalam praktikum ini adalah data Perusahaan toluca.

Perusahaan toluca ingin memprediksi berapa lama waktu yang dibutuhkan dalam bekerja berdasarkan banyaknya jumlah produksi yang dikerjakan.

library(readxl)
toluca<-read_excel("D:/toluca.xlsx")
head(toluca)
## # A tibble: 6 × 2
##   LotSize WorkHours
##     <dbl>     <dbl>
## 1      80       399
## 2      30       121
## 3      50       221
## 4      90       376
## 5      70       361
## 6      60       224

Eksplorasi Data

summary(toluca)
##     LotSize      WorkHours    
##  Min.   : 20   Min.   :113.0  
##  1st Qu.: 50   1st Qu.:224.0  
##  Median : 70   Median :342.0  
##  Mean   : 70   Mean   :312.3  
##  3rd Qu.: 90   3rd Qu.:389.0  
##  Max.   :120   Max.   :546.0
plot(toluca$LotSize, toluca$WorkHours,xlab="LotSize",ylab="WorkHours",pch=16)

Persamaan Regresi

Persamaan Regresi menggunakan MKT dengan Rumus

toluca$xdif = toluca$LotSize-mean(toluca$LotSize)
toluca$ydif = toluca$WorkHours-mean(toluca$WorkHours)
toluca$crp = toluca$xdif*toluca$ydif
toluca$xsq = toluca$xdif^2
toluca$ysq=toluca$ydif^2
b1 <- sum(toluca$crp)/sum(toluca$xsq)
b1
## [1] 3.570202
b0 <- mean(toluca$WorkHours) - b1 * mean(toluca$LotSize)
b0
## [1] 62.36586

Persamaan Regresi menggunakan fungsi lm pada R

Estimasi paramter model regresi dapat diperoleh dengan juga menggunakan fungsi lm sebagai berikut,

model2 <- lm(WorkHours~LotSize, data=toluca)
summary(model2)
## 
## Call:
## lm(formula = WorkHours ~ LotSize, data = toluca)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -83.876 -34.088  -5.982  38.826 103.528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   62.366     26.177   2.382   0.0259 *  
## LotSize        3.570      0.347  10.290 4.45e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.82 on 23 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8138 
## F-statistic: 105.9 on 1 and 23 DF,  p-value: 4.449e-10

Persaman Regresi dapat dituliskan sebagai berikut:

\(\hat{y}=b_{0}+b_{1}X\)

\(\hat{y}=62.366+3.570X\)

plot(toluca$LotSize,toluca$WorkHours,pch=16,xlab="LotSize",ylab="WorkHours")
abline(model2,col="red")

Fitted Value

#Fitted values
model2$fitted.values
##        1        2        3        4        5        6        7        8 
## 347.9820 169.4719 240.8760 383.6840 312.2800 276.5780 490.7901 347.9820 
##        9       10       11       12       13       14       15       16 
## 419.3861 240.8760 205.1739 312.2800 383.6840 133.7699 455.0881 419.3861 
##       17       18       19       20       21       22       23       24 
## 169.4719 240.8760 383.6840 455.0881 169.4719 383.6840 205.1739 347.9820 
##       25 
## 312.2800
toluca_yhat <- b0 + b1*toluca$LotSize
head(toluca_yhat)
## [1] 347.9820 169.4719 240.8760 383.6840 312.2800 276.5780

Residuals (Sisaan/Galat)

#residuals
model2$residuals
##           1           2           3           4           5           6 
##  51.0179798 -48.4719192 -19.8759596  -7.6840404  48.7200000 -52.5779798 
##           7           8           9          10          11          12 
##  55.2098990   4.0179798 -66.3860606 -83.8759596 -45.1739394 -60.2800000 
##          13          14          15          16          17          18 
##   5.3159596 -20.7698990 -20.0880808   0.6139394  42.5280808  27.1240404 
##          19          20          21          22          23          24 
##  -6.6840404 -34.0880808 103.5280808  84.3159596  38.8260606  -5.9820202 
##          25 
##  10.7200000
toluca_residual<-toluca$WorkHours - model2$fitted.values
head(toluca_residual)
##         1         2         3         4         5         6 
##  51.01798 -48.47192 -19.87596  -7.68404  48.72000 -52.57798

Tabel ANOVA

JKG<-sum((toluca$WorkHours-toluca_yhat)^2)
JKG #JKG=SSE
## [1] 54825.46
JKR<-sum(((toluca_yhat-mean(toluca$WorkHours))^2))
JKR
## [1] 252377.6
JKT<-sum((toluca$WorkHours-mean(toluca$WorkHours))^2)
JKT
## [1] 307203
MSE = sum(toluca_residual^2)/(length(toluca$LotSize)-2) #KTG
MSE
## [1] 2383.716
KTG<-JKG/(length(toluca$LotSize)-2)
KTG
## [1] 2383.716
anova(model2)
## Analysis of Variance Table
## 
## Response: WorkHours
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## LotSize    1 252378  252378  105.88 4.449e-10 ***
## Residuals 23  54825    2384                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Uji Hipotesis \(\beta_{1}\) dan \(\beta_{0}\)

#thitung b0 dan b1
KTG<-JKG/(length(toluca$LotSize)-2)
KTG
## [1] 2383.716
sb1sqr<-KTG/sum((toluca$LotSize-mean(toluca$LotSize))^2)
sb1<-sqrt(sb1sqr)
tb1<-b1/sb1
tb1
## [1] 10.28959
sb0sqr<-KTG*(1/length(toluca$LotSize)+((mean(toluca$LotSize)^2)/sum((toluca$LotSize-mean(toluca$LotSize))^2)))
sb0<-sqrt(sb0sqr)
tb0<-b0/sb0
tb0
## [1] 2.382428
ttabel<-qt(0.95,23)

Selang Kepercayaan untuk \(\beta_{1}\) dan \(\beta_{0}\)

#Selang kepercayaan
confint(model2,level=0.95)
##                2.5 %     97.5 %
## (Intercept) 8.213711 116.518006
## LotSize     2.852435   4.287969

Selang Kepercayaan E{Yh}

#Selang Kepercayaan E{Yh}
xh1<-65
yh1<-b0+b1*xh1
syh1sqr<-KTG*(1/length(toluca$LotSize)+(((xh1-mean(toluca$LotSize))^2)/sum((toluca$LotSize-mean(toluca$LotSize))^2)))
syh1<-sqrt(syh1sqr)
Ttabel<-qt(0.975,length(toluca$LotSize)-2)

yh1_B<-yh1-Ttabel*syh1
yh1_A<-yh1+Ttabel*syh1
yh1_B
## [1] 273.9129
yh1_A
## [1] 314.9451
#Prediksi
predxc1<-predict(model2,data.frame(LotSize = c(xh1)),interval = "confidence", se.fit=FALSE,level = 0.95)
predxc1
##       fit      lwr      upr
## 1 294.429 273.9129 314.9451
plot(toluca$LotSize, toluca$WorkHours)
abline(model2,col="blue")
points(xh1, predxc1[, "fit"],col="red",pch=15)
points(xh1, predxc1[, "lwr"], lty = "dotted",col="red")
points(xh1, predxc1[, "upr"], lty = "dotted",col="red")