Tugas Analisis Regresi Berganda

Anggota kelompok 9:
Deden Ahmad Rabani (G1401221016)
Nabil Bintang Prayoga (G1401221017)
Fathiyya Mufida (G1401220931)

Data Frame

library(readxl)
data<-read_xlsx("D:\\KULIAHH\\SEMESTER 4\\ANREG\\data ppt.xlsx")
n <- 30
p <- 6

Y <- data$Y
X0 <- rep(1,30)
X1 <- data$X1
X2 <- data$X2
X3 <- data$X3
X4 <- data$X4
X5 <- data$X5
X6 <- data$X6

dt <- data.frame(Y, X1, X2, X3, X4, X5, X6)
dt
##     Y X1 X2 X3 X4 X5 X6
## 1  43 51 30 39 61 92 45
## 2  63 64 51 54 63 73 47
## 3  71 70 68 69 76 86 48
## 4  61 63 45 47 54 84 35
## 5  81 78 56 66 71 83 47
## 6  43 55 49 44 54 49 34
## 7  58 67 42 56 66 68 35
## 8  71 75 50 55 70 66 41
## 9  72 82 72 67 71 83 31
## 10 67 61 45 47 62 80 41
## 11 64 53 53 58 58 67 34
## 12 67 60 47 39 59 74 41
## 13 69 62 57 42 55 63 25
## 14 68 83 83 45 59 77 35
## 15 77 77 54 72 79 77 46
## 16 81 90 50 72 60 54 36
## 17 74 85 64 69 79 79 63
## 18 65 60 65 75 55 80 60
## 19 65 70 46 57 75 85 46
## 20 50 58 68 54 64 78 52
## 21 50 40 33 34 43 64 33
## 22 64 61 52 62 66 80 41
## 23 53 66 52 50 63 80 37
## 24 40 37 42 58 50 57 49
## 25 63 54 42 48 66 75 33
## 26 66 77 66 63 88 76 72
## 27 78 75 58 74 80 78 49
## 28 48 57 44 45 51 83 38
## 29 85 85 71 71 77 74 55
## 30 82 82 39 59 64 78 39

Eksplorasi Data

plot(X1, Y, pch=19)

plot(X2, Y, pch=19)

plot(X3, Y, pch=19)

plot(X4, Y, pch=19)

plot(X5, Y, pch=19)

plot(X6, Y, pch=19)

Pembentukan Model Tanpa Fungsi Bawaan (dengan Matriks)

Y <- as.matrix(data$Y)
X <- as.matrix(cbind(X0,X1,X2,X3,X4,X5,X6))
b <- solve(t(X)%*%X)%*%t(X)%*%Y;round(b,4)
##       [,1]
## X0 10.7871
## X1  0.6132
## X2 -0.0731
## X3  0.3203
## X4  0.0817
## X5  0.0384
## X6 -0.2171
(b0<-b[1])
## [1] 10.78708
(b1<-b[2])
## [1] 0.6131876
(b2<-b[3])
## [1] -0.07305014
(b3<-b[4])
## [1] 0.3203321
(b4<-b[5])
## [1] 0.08173213
(b5<-b[6])
## [1] 0.03838145
(b6<-b[7])
## [1] -0.2170567

Model Regresi Linier Berganda

\[ \hat Y = 10.78708 + 0.6131876X_1 -0.07305014X_2 + 0.3203321X_3 + 0.08173213X_4 + 0.03838145X_5 -0.2170567X_6 + \epsilon \]

Koefisien Determinasi dan Penyesuaiannya

(df<-(n-p))
## [1] 24
sigma_kuadrat <- (t(Y)%*%Y-t(b)%*%t(X)%*%Y)/df
(Res_se <- sqrt(sigma_kuadrat))
##          [,1]
## [1,] 6.919177
(yduga <- b0+b1*X1+b2*X2+b3*X3+b4*X4+b5*X5+b6*X6)
##  [1] 51.11030 61.35277 69.93944 61.22684 74.45380 53.94185 67.14841 70.09701
##  [9] 79.53099 59.19846 57.92572 55.40103 59.58168 70.21401 76.54933 84.54785
## [17] 76.15013 61.39736 68.01656 55.62014 42.60324 63.81902 63.66400 44.62475
## [25] 57.31710 67.84347 75.14036 56.04535 77.66053 76.87850
(y <- data.frame(Y,yduga))
##     Y    yduga
## 1  43 51.11030
## 2  63 61.35277
## 3  71 69.93944
## 4  61 61.22684
## 5  81 74.45380
## 6  43 53.94185
## 7  58 67.14841
## 8  71 70.09701
## 9  72 79.53099
## 10 67 59.19846
## 11 64 57.92572
## 12 67 55.40103
## 13 69 59.58168
## 14 68 70.21401
## 15 77 76.54933
## 16 81 84.54785
## 17 74 76.15013
## 18 65 61.39736
## 19 65 68.01656
## 20 50 55.62014
## 21 50 42.60324
## 22 64 63.81902
## 23 53 63.66400
## 24 40 44.62475
## 25 63 57.31710
## 26 66 67.84347
## 27 78 75.14036
## 28 48 56.04535
## 29 85 77.66053
## 30 82 76.87850
(R_squared <- (cor(Y,yduga))^2)
##          [,1]
## [1,] 0.732602
(adj_R_squared <- 1-((1-R_squared)*(n-1)/(n-p-1)))
##          [,1]
## [1,] 0.662846

Hasil tersebut menunjukkan bahwa keragaman Y dapat dijelaskan oleh keragaman X1, X2, X3, X4, X5, dan X6 sebesar 0,662846 atau 66,2846%

Uji F dan Standar Error Parameter Regresi

galat <- Y-(b0+b1*X1+b2*X2+b3*X3+b4*X4+b5*X5+b6*X6)
dbg <- n-p-1
JKG <- sum(galat^2)
KTG   <- JKG/(n-p-1)

dbreg <- p
JKReg <- sum((yduga-mean(Y))^2)
KTReg <- sum((yduga-mean(Y))^2)/(p)

dbt <- n-1
JKT <- JKReg+JKG

(Fhit  <- KTReg/KTG)
## [1] 10.50235
p_value <- pf(Fhit, dbreg, dbg, lower.tail = F)
se_b <- sqrt(sigma_kuadrat[1]*solve(t(X)%*%X))
## Warning in sqrt(sigma_kuadrat[1] * solve(t(X) %*% X)): NaNs produced
(se_b0 <- se_b[1,1])
## [1] 11.34525
(se_b1 <- se_b[2,2])
## [1] 0.1575936
(se_b2 <- se_b[3,3])
## [1] 0.132867
(se_b3 <- se_b[4,4])
## [1] 0.1649721
(se_b4 <- se_b[5,5])
## [1] 0.2168145
(se_b5 <- se_b[6,6])
## [1] 0.1439005
(se_b6 <- se_b[7,7])
## [1] 0.1744573

Uji Hipotesis

Uji hipotesis untuk mengetahui peubah penjelas yang berpengaruh terhadap peubah respon

\[ H_0: b_1 = b_2 = b_3 = b_4 = b_5 = b_6\\ H_1: b_j \neq 0\: \text{untuk semua j, j = 1, 2, 3, 4, 5, 6} \]

Tabel Sidik Ragam

SK <- c("Regresi", "Residual", "Total")
db <- c(dbreg, dbg, dbt)
JK <- c(JKReg, JKG, JKT)
KT <- c(KTReg, KTG, NA)
Fhit <- c(Fhit, NA, NA)
pvalue <- c(p_value, NA, NA)
(Anova <- data.frame(SK, db, JK, KT, Fhit, pvalue))
##         SK db       JK        KT     Fhit       pvalue
## 1  Regresi  6 3147.966 524.66106 10.50235 1.240412e-05
## 2 Residual 23 1149.000  49.95654       NA           NA
## 3    Total 29 4296.967        NA       NA           NA

Hasil p-value didapatkan sebesar \(1.240412e^{-5}\) < 0.05 sehingga tolak dapat H0, maka minimal ada satu peubah penjelas yang berpengaruh terhadap respon.

Signifikansi Parameter

t-value

(t_b0 <- b0/se_b0)
## [1] 0.9508015
(t_b1 <- b1/se_b1)
## [1] 3.890942
(t_b2 <- b2/se_b2)
## [1] -0.549799
(t_b3 <- b3/se_b3)
## [1] 1.941735
(t_b4 <- b4/se_b4)
## [1] 0.3769681
(t_b5 <- b5/se_b5)
## [1] 0.2667222
(t_b6 <- b6/se_b6)
## [1] -1.244182

p-value

2*pt(-abs(t_b0 ),df)
## [1] 0.3511828
2*pt(-abs(t_b1 ),df)
## [1] 0.0006937845
2*pt(-abs(t_b2 ),df)
## [1] 0.5875377
2*pt(-abs(t_b3 ),df)
## [1] 0.06399683
2*pt(-abs(t_b4 ),df)
## [1] 0.70951
2*pt(-abs(t_b5 ),df)
## [1] 0.7919614
2*pt(-abs(t_b6 ),df)
## [1] 0.225447

Selang kepercayaan (1-alfa)*100%

t <- qt(.975, df <- n-p)

BB_b0 <- b0-t*se_b0
BA_b0 <- b0+t*se_b0

BB_b1 <- b1-t*se_b1
BA_b1 <- b1+t*se_b1

BB_b2 <- b2-t*se_b2
BA_b2 <- b2+t*se_b2

BB_b3 <- b3-t*se_b3
BA_b3 <- b3+t*se_b3

BB_b4 <- b4-t*se_b4
BA_b4 <- b4+t*se_b4

BB_b5 <- b5-t*se_b5
BA_b5 <- b5+t*se_b5

BB_b6 <- b6-t*se_b6
BA_b6 <- b6+t*se_b6

Batas.Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b2,6),round(BB_b3,6),round(BB_b4,6),round(BB_b5,6),round(BB_b6,6)))
Batas.Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b2,6),round(BA_b3,6),round(BA_b4,6),round(BA_b5,6),round(BA_b6,6)))

Selang.Kepercayaan <- cbind(Batas.Bawah, Batas.Atas)
colnames(Selang.Kepercayaan ) <- c("Batas bawah Selang (2.5%)", "Batas atas Selang (97.5%)")
rownames(Selang.Kepercayaan ) <- c("Intersept", "b1", "b2", "b3", "b4", "b5", "b6")
Selang.Kepercayaan
##           Batas bawah Selang (2.5%) Batas atas Selang (97.5%)
## Intersept                -12.628360                 34.202512
## b1                         0.287930                  0.938445
## b2                        -0.347274                  0.201174
## b3                        -0.020154                  0.660818
## b4                        -0.365751                  0.529215
## b5                        -0.258614                  0.335377
## b6                        -0.577119                  0.143005

Pembentukan Model dengan Fungsi lm

regresi <- lm(Y~X1+X2+X3+X4+X5+X6, data=data)
summary(regresi)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9418  -4.3555   0.3158   5.5425  11.5990 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.78708   11.58926   0.931 0.361634    
## X1           0.61319    0.16098   3.809 0.000903 ***
## X2          -0.07305    0.13572  -0.538 0.595594    
## X3           0.32033    0.16852   1.901 0.069925 .  
## X4           0.08173    0.22148   0.369 0.715480    
## X5           0.03838    0.14700   0.261 0.796334    
## X6          -0.21706    0.17821  -1.218 0.235577    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.068 on 23 degrees of freedom
## Multiple R-squared:  0.7326, Adjusted R-squared:  0.6628 
## F-statistic:  10.5 on 6 and 23 DF,  p-value: 1.24e-05
anova(regresi)
## Analysis of Variance Table
## 
## Response: Y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## X1         1 2927.58 2927.58 58.6026 9.056e-08 ***
## X2         1    7.52    7.52  0.1505    0.7016    
## X3         1  137.25  137.25  2.7473    0.1110    
## X4         1    0.94    0.94  0.0189    0.8920    
## X5         1    0.56    0.56  0.0113    0.9163    
## X6         1   74.11   74.11  1.4835    0.2356    
## Residuals 23 1149.00   49.96                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Korelasi Antar Variabel

cor(data)
##            Y        X1        X2        X3        X4        X5        X6
## Y  1.0000000 0.8254176 0.4261169 0.6236782 0.5901390 0.1564392 0.1550863
## X1 0.8254176 1.0000000 0.5582882 0.5967358 0.6691975 0.1877143 0.2245796
## X2 0.4261169 0.5582882 1.0000000 0.4933310 0.4454779 0.1472331 0.3432934
## X3 0.6236782 0.5967358 0.4933310 1.0000000 0.6403144 0.1159652 0.5316198
## X4 0.5901390 0.6691975 0.4454779 0.6403144 1.0000000 0.3768830 0.5741862
## X5 0.1564392 0.1877143 0.1472331 0.1159652 0.3768830 1.0000000 0.2833432
## X6 0.1550863 0.2245796 0.3432934 0.5316198 0.5741862 0.2833432 1.0000000

Berdasarkan hasil matriks korelasi tersebut diperoleh bahwa X1, X3, dan X4 memiliki nilai lebih dari 0.5, selain itu nilai adjusted R squared yang diperoleh sekitar 66.28%. Namun perlu diperiksa kembali kombinasi lain baik dua variabel hingga 5 variabel untuk dibandingkan nilai ajdusted R squarednya untuk memperoleh model yang terbaik. Karena pada hasil korelasi hanya ada 3 variabel X yang lebih besar dari 0.5, maka dapat disimpulkan bahwa model terbaik akan diperoleh dari kombinasi 3 variabel yang ada

Variabel X1, X3, X4

reg134 <- lm(Y~X1+X3+X4, data=data)
summary(reg134)
## 
## Call:
## lm(formula = Y ~ X1 + X3 + X4, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6282  -5.8107   0.5115   6.3946  10.3509 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.52260    8.30481   1.267    0.216    
## X1           0.65349    0.13637   4.792 5.82e-05 ***
## X3           0.22069    0.14967   1.475    0.152    
## X4          -0.02864    0.18245  -0.157    0.876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.943 on 26 degrees of freedom
## Multiple R-squared:  0.7083, Adjusted R-squared:  0.6746 
## F-statistic: 21.04 on 3 and 26 DF,  p-value: 3.957e-07

Variabel X1, X2, X3

reg123 <- lm(Y~X1+X2+X3, data=data)
summary(reg123)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2012  -5.7478   0.5599   5.8226  11.3241 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.2583     7.3183   1.538   0.1360    
## X1            0.6824     0.1288   5.296 1.54e-05 ***
## X2           -0.1033     0.1293  -0.799   0.4318    
## X3            0.2380     0.1394   1.707   0.0997 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.863 on 26 degrees of freedom
## Multiple R-squared:  0.715,  Adjusted R-squared:  0.6821 
## F-statistic: 21.74 on 3 and 26 DF,  p-value: 2.936e-07

Pemeriksaan Variabel X1, X2, X4

reg124 <- lm(Y~X1+X2+X4, data=data)
summary(reg124)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.3676  -6.0893  -0.0922   6.2167   9.9845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.82308    8.76216   1.463    0.155    
## X1           0.73790    0.14685   5.025 3.15e-05 ***
## X2          -0.05805    0.13264  -0.438    0.665    
## X4           0.08898    0.17427   0.511    0.614    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.201 on 26 degrees of freedom
## Multiple R-squared:  0.6862, Adjusted R-squared:   0.65 
## F-statistic: 18.95 on 3 and 26 DF,  p-value: 1.007e-06

Pemeriksaan Variabel X1, X3, X6

reg136 <- lm(Y~X1+X3+X6, data=data)
summary(reg136)
## 
## Call:
## lm(formula = Y ~ X1 + X3 + X6, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.217  -5.377   0.967   6.078  11.540 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  13.5777     7.5439   1.800   0.0835 .  
## X1            0.6227     0.1181   5.271 1.65e-05 ***
## X3            0.3124     0.1542   2.026   0.0532 .  
## X6           -0.1870     0.1449  -1.291   0.2082    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.734 on 26 degrees of freedom
## Multiple R-squared:  0.7256, Adjusted R-squared:  0.6939 
## F-statistic: 22.92 on 3 and 26 DF,  p-value: 1.807e-07

Hasil di atas merupakan contoh beberapa hasil kombinasi 3 variabel X, jika dilanjutkan keseluruhan maka akan diperoleh bahwa kombinasi X1, X3, dan X6 menghasilkan model terbaik dengan adjusted R squared 0.6939. Artinya bahwa keragaman X1, X3, dan X6 dapat menjelaskan keragaman Y sebesar 69,39%.

library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
ols_step_best_subset(regresi)
##     Best Subsets Regression     
## --------------------------------
## Model Index    Predictors
## --------------------------------
##      1         X1                
##      2         X1 X3             
##      3         X1 X3 X6          
##      4         X1 X2 X3 X6       
##      5         X1 X2 X3 X4 X6    
##      6         X1 X2 X3 X4 X5 X6 
## --------------------------------
## 
##                                                     Subsets Regression Summary                                                     
## -----------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                             
## Model    R-Square    R-Square    R-Square     C(p)       AIC         SBIC        SBC         MSEP         FPE       HSP       APC  
## -----------------------------------------------------------------------------------------------------------------------------------
##   1        0.6813      0.6699      0.6379    1.4115    205.7638    120.9874    209.9674    1467.4370    52.1669    1.8114    0.3642 
##   2        0.7080      0.6864      0.6402    1.1148    205.1387    121.0938    210.7435    1396.1991    51.1153    1.7872    0.3569 
##   3        0.7256      0.6939       0.642    1.6027    205.2758    122.1609    212.2818    1364.6223    51.3971    1.8140    0.3588 
##   4        0.7293      0.6860      0.6211    3.2805    206.8634    124.4468    215.2706    1402.0751    54.2739    1.9384    0.3789 
##   5        0.7318      0.6759      0.5995    5.0682    208.5886    126.8776    218.3970    1449.6936    57.6203    2.0877    0.4023 
##   6        0.7326      0.6628      0.5471    7.0000    210.4998    129.4391    221.7094    1511.1095    61.6131    2.2708    0.4302 
## -----------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria

Kombinasi terbaik tersebut dapat dibuktikan dengan fungsi ols yang menampilkan kombinasi terbaik dari setiap jumlah kombinasi dan menampilkan hasil adjusted R squared. Diperoleh hasil yang sama bahwa kombinasi X1, X3, dan X6 menghasilkan adjusted R squared terbesar.

Model Terbaik berdasarkan Variabel X1, X3, dan X6

Y <- as.matrix(data$Y)
X <- as.matrix(cbind(X0,X1,X3,X6))
b <- solve(t(X)%*%X)%*%t(X)%*%Y

(b0<-b[1])
## [1] 13.57774
(b1<-b[2])
## [1] 0.6227297
(b2<-b[3])
## [1] 0.312387
(b3<-b[4])
## [1] -0.1869508

Model Regresi Linier Berganda

\[ \hat Y = 13.57774 + 0.6227297X_1 + 0.312387X_3 - 0.1869508X_6 + \epsilon \]