Analisis Regresi dengan Matriks

Regresi Linier Sederhana dengan Matriks

Data

Data yang digunakan dalam praktikum ini adalah data Perusahaan toluca.

Perusahaan toluca ingin memprediksi berapa lama waktu yang dibutuhkan dalam bekerja berdasarkan banyaknya jumlah produksi yang dikerjakan.

library(readxl)
toluca<-read_excel("D:/toluca.xlsx")
head(toluca)
## # A tibble: 6 × 2
##   LotSize WorkHours
##     <dbl>     <dbl>
## 1      80       399
## 2      30       121
## 3      50       221
## 4      90       376
## 5      70       361
## 6      60       224

Eksplorasi Data

summary(toluca)
##     LotSize      WorkHours    
##  Min.   : 20   Min.   :113.0  
##  1st Qu.: 50   1st Qu.:224.0  
##  Median : 70   Median :342.0  
##  Mean   : 70   Mean   :312.3  
##  3rd Qu.: 90   3rd Qu.:389.0  
##  Max.   :120   Max.   :546.0
plot(toluca$LotSize, toluca$WorkHours,xlab="LotSize",ylab="WorkHours",pch=16)

Persamaan Regresi

model2 <- lm(WorkHours~LotSize, data=toluca)
summary(model2)
## 
## Call:
## lm(formula = WorkHours ~ LotSize, data = toluca)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -83.876 -34.088  -5.982  38.826 103.528 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   62.366     26.177   2.382   0.0259 *  
## LotSize        3.570      0.347  10.290 4.45e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.82 on 23 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8138 
## F-statistic: 105.9 on 1 and 23 DF,  p-value: 4.449e-10

Persaman Regresi dapat dituliskan sebagai berikut:

\(\hat{y}=b_{0}+b_{1}X\)

\(\hat{y}=62.366+3.570X\)

ANOVA

anova(model2)
## Analysis of Variance Table
## 
## Response: WorkHours
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## LotSize    1 252378  252378  105.88 4.449e-10 ***
## Residuals 23  54825    2384                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interval Kepercayaan

confint (model2, level = 0.95)
##                2.5 %     97.5 %
## (Intercept) 8.213711 116.518006
## LotSize     2.852435   4.287969

Persamaan Regresi dengan Matriks

Y<-matrix(toluca$WorkHours)
N<-nrow(Y) #mengetahui jumlah baris Y
X<-matrix(1,N)
X<-cbind(X,toluca$LotSize)
J<-matrix(1,N,N)
Xt<-t(X)
A<-Xt %*% X
A
##      [,1]   [,2]
## [1,]   25   1750
## [2,] 1750 142300
B<-solve(A)
B
##              [,1]          [,2]
## [1,]  0.287474747 -3.535354e-03
## [2,] -0.003535354  5.050505e-05
C<- Xt %*% Y
C
##        [,1]
## [1,]   7807
## [2,] 617180
Beta<-B %*% C
Beta
##           [,1]
## [1,] 62.365859
## [2,]  3.570202

Dari perhitungan di atas jelas bahwa nilai b0 = 62.365859 dan nilai b1 = 3.570202.

Inferensi

JKT<-(t(Y)%*%Y)-((1/N)*(t(Y)%*%J%*%Y))
JKT
##        [,1]
## [1,] 307203
JKG<-(t(Y)%*%Y)-(t(Beta)%*%t(X)%*%Y)
JKG
##          [,1]
## [1,] 54825.46
JKR<-JKT-JKG
JKR
##          [,1]
## [1,] 252377.6
fhitung<-(JKR/(2-1))/(JKG/(N-2))
fhitung
##          [,1]
## [1,] 105.8757
qf(1-0.01,(2-1),(N-2)) #ftabel alpha = 0.01
## [1] 7.881134
KTG<-JKG/(N-2)
KTG<-as.numeric(KTG)
KTG
## [1] 2383.716
Sb<-KTG*solve(t(X)%*%X)
Sb
##            [,1]       [,2]
## [1,] 685.258045 -8.4272774
## [2,]  -8.427277  0.1203897
thitung0<-Beta[1,1]/sqrt(Sb[1,1])
thitung0
## [1] 2.382428
thitung1<-Beta[2,1]/sqrt(Sb[2,2])
thitung1
## [1] 10.28959
qt(1-0.01/2,(N-2)) #ttabel alpha = 0.01
## [1] 2.807336

Regresi Linier Berganda dengan Matriks

Data

library(readxl)
bodyf<- read_excel("D:/data_bodyfat.xlsx")
print.data.frame(bodyf[,-1]) #menghapus kolom 1
##    Triceps_X1 Thigh_X2 Midarm_X3 Bodyfat_Y
## 1        19.5     43.1      29.1      11.9
## 2        24.7     49.8      28.2      22.8
## 3        30.7     51.9      37.0      18.7
## 4        29.8     54.3      31.1      20.1
## 5        19.1     42.2      30.9      12.9
## 6        25.6     53.9      23.7      21.7
## 7        31.4     58.5      27.6      27.1
## 8        27.9     52.1      30.6      25.4
## 9        22.1     49.9      23.2      21.3
## 10       25.5     53.5      24.8      19.3
## 11       31.1     56.6      30.0      25.4
## 12       30.4     56.7      28.3      27.2
## 13       18.7     46.5      23.0      11.7
## 14       19.7     44.2      28.6      17.8
## 15       14.6     42.7      21.3      12.8
## 16       29.5     54.4      30.1      23.9
## 17       27.7     55.3      25.7      22.6
## 18       30.2     58.6      24.6      25.4
## 19       22.7     48.2      27.1      14.8
## 20       25.2     51.0      27.5      21.1

dengan,

X1: Triceps Skinfold Thickness

X2: Thigh Circumference

X3: Midarm Circumference

Y: Body Fat

Persamaan Regresi

modela<- lm(formula = Bodyfat_Y ~ Triceps_X1+Thigh_X2+Midarm_X3, data=bodyf)
summary(modela)
## 
## Call:
## lm(formula = Bodyfat_Y ~ Triceps_X1 + Thigh_X2 + Midarm_X3, data = bodyf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7263 -1.6111  0.3923  1.4656  4.1277 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  117.085     99.782   1.173    0.258
## Triceps_X1     4.334      3.016   1.437    0.170
## Thigh_X2      -2.857      2.582  -1.106    0.285
## Midarm_X3     -2.186      1.595  -1.370    0.190
## 
## Residual standard error: 2.48 on 16 degrees of freedom
## Multiple R-squared:  0.8014, Adjusted R-squared:  0.7641 
## F-statistic: 21.52 on 3 and 16 DF,  p-value: 7.343e-06

ANOVA

anova (modela)
## Analysis of Variance Table
## 
## Response: Bodyfat_Y
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Triceps_X1  1 352.27  352.27 57.2768 1.131e-06 ***
## Thigh_X2    1  33.17   33.17  5.3931   0.03373 *  
## Midarm_X3   1  11.55   11.55  1.8773   0.18956    
## Residuals  16  98.40    6.15                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interval Kepercayaan

confint (modela, level = 0.95)
##                  2.5 %     97.5 %
## (Intercept) -94.444550 328.613940
## Triceps_X1   -2.058507  10.726691
## Thigh_X2     -8.330476   2.616780
## Midarm_X3    -5.568367   1.196247

Persamaan Regresi dengan Matriks

Y<-matrix(bodyf$Bodyfat_Y)
N<-nrow(Y) #mengetahui jumlah baris Y
X<-matrix(1,N)
X<-cbind(X,bodyf$Triceps_X1,bodyf$Thigh_X2, bodyf$Midarm_X3)
J<-matrix(1,N,N)
Xt<-t(X)
A<-Xt %*% X
A
##        [,1]     [,2]     [,3]     [,4]
## [1,]   20.0   506.10  1023.40   552.40
## [2,]  506.1 13286.29 26358.69 14137.83
## [3,] 1023.4 26358.69 52888.00 28297.02
## [4,]  552.4 14137.83 28297.02 15510.02
B<-solve(A)
B
##            [,1]       [,2]        [,3]        [,4]
## [1,] 1618.86721 48.8102522 -41.8487041 -25.7987855
## [2,]   48.81025  1.4785133  -1.2648388  -0.7785022
## [3,]  -41.84870 -1.2648388   1.0839791   0.6657581
## [4,]  -25.79879 -0.7785022   0.6657581   0.4139009
C<- Xt %*% Y
C
##          [,1]
## [1,]   403.90
## [2,] 10631.65
## [3,] 21113.50
## [4,] 11206.12
Beta<-B %*% C
Beta
##            [,1]
## [1,] 117.084695
## [2,]   4.334092
## [3,]  -2.856848
## [4,]  -2.186060

Inferensi

JKT<-(t(Y)%*%Y)-((1/N)*(t(Y)%*%J%*%Y))
JKT
##          [,1]
## [1,] 495.3895
JKG<-(t(Y)%*%Y)-(t(Beta)%*%t(X)%*%Y)
JKG
##          [,1]
## [1,] 98.40489
JKR<-JKT-JKG
JKR
##          [,1]
## [1,] 396.9846
fhitung<-(JKR/(4-1))/(JKG/(N-4))
fhitung
##          [,1]
## [1,] 21.51571
qf(0.99,(4-1),(N-4)) #alfa=1%
## [1] 5.292214
KTG<-JKG/(N-4)
KTG<-as.numeric(KTG)
KTG
## [1] 6.150306
Sb<-KTG*solve(t(X)%*%X)
Sb
##           [,1]       [,2]        [,3]        [,4]
## [1,] 9956.5279 300.197962 -257.382315 -158.670413
## [2,]  300.1980   9.093309   -7.779145   -4.788026
## [3,] -257.3823  -7.779145    6.666803    4.094615
## [4,] -158.6704  -4.788026    4.094615    2.545617
thitung0<-Beta[1,1]/sqrt(Sb[1,1])
thitung0
## [1] 1.1734
thitung1<-Beta[2,1]/sqrt(Sb[2,2])
thitung1
## [1] 1.437266