#input data
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
Data <- read_xlsx("C:/Users/arantxa/Downloads/Data Anreg.xlsx")
Data
## # A tibble: 30 × 8
##      Row     Y    X1    X2    X3    X4    X5    X6
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1    43    51    30    39    61    92    45
##  2     2    63    64    51    54    63    73    47
##  3     3    71    70    68    69    76    86    48
##  4     4    61    63    45    47    54    84    35
##  5     5    81    78    56    66    71    83    47
##  6     6    43    55    49    44    54    49    34
##  7     7    58    67    42    56    66    68    35
##  8     8    71    75    50    55    70    66    41
##  9     9    72    82    72    67    71    83    31
## 10    10    67    61    45    47    62    80    41
## # ℹ 20 more rows
n <- nrow(Data)
p <- 6
y <- as.numeric(Data$Y)
x0 <- rep(1,30)
x1 <- as.numeric(Data$X1)
x2 <- as.numeric(Data$X2)
x3 <- as.numeric(Data$X3)
x4 <- as.numeric(Data$X4)
x5 <- as.numeric(Data$X5)
x6 <- as.numeric(Data$X6)

Data <- data.frame(cbind(y,x0,x1,x2,x3,x4,x5,x6))
head(Data)
##    y x0 x1 x2 x3 x4 x5 x6
## 1 43  1 51 30 39 61 92 45
## 2 63  1 64 51 54 63 73 47
## 3 71  1 70 68 69 76 86 48
## 4 61  1 63 45 47 54 84 35
## 5 81  1 78 56 66 71 83 47
## 6 43  1 55 49 44 54 49 34
#pembentukan model tanpa fungsi bawaan
#parameter regresi#
y <- as.matrix(y)
x <- as.matrix(cbind(x0,x1,x2,x3,x4,x5,x6))
b <- solve(t(x)%*%x)%*%t(x)%*%y;round(b,4)
##       [,1]
## x0 10.7871
## x1  0.6132
## x2 -0.0731
## x3  0.3203
## x4  0.0817
## x5  0.0384
## x6 -0.2171
b0 <- b[1];b0
## [1] 10.78708
b1 <- b[2];b1
## [1] 0.6131876
b2 <- b[3];b2
## [1] -0.07305014
b3 <- b[4];b3
## [1] 0.3203321
b4 <- b[5];b4
## [1] 0.08173213
b5 <- b[6];b5
## [1] 0.03838145
b6 <- b[7];b6
## [1] -0.2170567
#koefisiensi determinasi
sigma_kuadrat <- (t(y)%*%y-t(b)%*%t(x)%*%y)/(n-p)
Res_se <- sqrt(sigma_kuadrat)
round(Res_se,3)
##       [,1]
## [1,] 6.919
df <- n-p
df
## [1] 24
y_duga <- b0+b1*x1+b2*x2+b3*x3+b4*x4+b5*x5+b6*x6
Y <- data.frame(y, y_duga)
Y
##     y   y_duga
## 1  43 51.11030
## 2  63 61.35277
## 3  71 69.93944
## 4  61 61.22684
## 5  81 74.45380
## 6  43 53.94185
## 7  58 67.14841
## 8  71 70.09701
## 9  72 79.53099
## 10 67 59.19846
## 11 64 57.92572
## 12 67 55.40103
## 13 69 59.58168
## 14 68 70.21401
## 15 77 76.54933
## 16 81 84.54785
## 17 74 76.15013
## 18 65 61.39736
## 19 65 68.01656
## 20 50 55.62014
## 21 50 42.60324
## 22 64 63.81902
## 23 53 63.66400
## 24 40 44.62475
## 25 63 57.31710
## 26 66 67.84347
## 27 78 75.14036
## 28 48 56.04535
## 29 85 77.66053
## 30 82 76.87850
R_squared <- (cor(y, y_duga))^2;
round(R_squared, 4)
##        [,1]
## [1,] 0.7326
R_squared_adj <- 1-((1-R_squared)*(n-1)/(n-p));
round(R_squared_adj,4)
##        [,1]
## [1,] 0.6769
#uji F dan std. error parameter regresi
KTReg <- sum((y_duga-mean(y))^2)/(p)
galat <- y-(b0+b1*x1+b2*x2+b3*x3+b4*x4+b5*x5+b6*x6)
KTG <- sum(galat^2)/(n-p-1)
Fhit <- KTReg/KTG;round(Fhit,0)
## [1] 11
dbreg <- p
dbreg
## [1] 6
dbg <- n - 7
dbg
## [1] 23
dbt <-n-1
dbt
## [1] 29
JKreg <-sum((y_duga-mean(y))^2)
JKreg
## [1] 3147.966
JKG <-sum(galat^2)
JKG
## [1] 1149
JKT <- JKG + JKreg 
JKT
## [1] 4296.967
p_value <- pf(Fhit, dbreg, dbg, lower.tail = F)
p_value
## [1] 1.240412e-05
#uji kelayakan model
SK <- c("Regresi", "Residual", "Total") 
db <- c(dbreg, dbg, dbt) 
JK <- c(JKreg, JKG, JKT) 
KT <- c(KTReg, KTG, NA) 
F_hitung <- c(Fhit, NA, NA) 
P_value <- c(p_value, NA, NA) 
TabelAnova <- data.frame(SK, db, JK, KT, F_hitung, P_value) 
TabelAnova
##         SK db       JK        KT F_hitung      P_value
## 1  Regresi  6 3147.966 524.66106 10.50235 1.240412e-05
## 2 Residual 23 1149.000  49.95654       NA           NA
## 3    Total 29 4296.967        NA       NA           NA
se_b <- sqrt(sigma_kuadrat[1]*solve(t(x)%*%x)) 
## Warning in sqrt(sigma_kuadrat[1] * solve(t(x) %*% x)): NaNs produced
se_b0 <- se_b[1,1];round(se_b0,4)
## [1] 11.3452
se_b1 <- se_b[2,2];round(se_b1,4)
## [1] 0.1576
se_b2 <- se_b[3,3];round(se_b2,4)
## [1] 0.1329
se_b3 <- se_b[4,4];round(se_b3,4)
## [1] 0.165
se_b4 <- se_b[5,5];round(se_b4,4)
## [1] 0.2168
se_b5 <- se_b[6,6];round(se_b5,4)
## [1] 0.1439
se_b6 <- se_b[7,7];round(se_b6,4)
## [1] 0.1745
#uji hipotesis bagi parameter regresi secara parsial (nilai-t)
#t-value
t_b0 <- b0/se_b0;round(t_b0,2)
## [1] 0.95
t_b1 <- b1/se_b1;round(t_b1,2)
## [1] 3.89
t_b2 <- b2/se_b2;round(t_b2,2)
## [1] -0.55
t_b3 <- b3/se_b3;round(t_b3,2)
## [1] 1.94
t_b4 <- b4/se_b4;round(t_b4,2)
## [1] 0.38
t_b5 <- b5/se_b5;round(t_b5,2)
## [1] 0.27
t_b6 <- b6/se_b6;round(t_b6,2)
## [1] -1.24
#p-value
2*pt(-abs(t_b0 ),df <- n-p)
## [1] 0.3511828
2*pt(-abs(t_b1 ),df <- n-p)
## [1] 0.0006937845
2*pt(-abs(t_b2 ),df <- n-p)
## [1] 0.5875377
2*pt(-abs(t_b3 ),df <- n-p)
## [1] 0.06399683
2*pt(-abs(t_b4 ),df <- n-p)
## [1] 0.70951
2*pt(-abs(t_b5 ),df <- n-p)
## [1] 0.7919614
2*pt(-abs(t_b6 ),df <- n-p)
## [1] 0.225447
#pembentukan model dengan fungsi lm
reg <- lm(y~x1+x2+x3+x4+x5+x6, data= Data) 
summary(reg)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6, data = Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9418  -4.3555   0.3158   5.5425  11.5990 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.78708   11.58926   0.931 0.361634    
## x1           0.61319    0.16098   3.809 0.000903 ***
## x2          -0.07305    0.13572  -0.538 0.595594    
## x3           0.32033    0.16852   1.901 0.069925 .  
## x4           0.08173    0.22148   0.369 0.715480    
## x5           0.03838    0.14700   0.261 0.796334    
## x6          -0.21706    0.17821  -1.218 0.235577    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.068 on 23 degrees of freedom
## Multiple R-squared:  0.7326, Adjusted R-squared:  0.6628 
## F-statistic:  10.5 on 6 and 23 DF,  p-value: 1.24e-05
cor(Data)
## Warning in cor(Data): the standard deviation is zero
##            y x0        x1        x2        x3        x4        x5        x6
## y  1.0000000 NA 0.8254176 0.4261169 0.6236782 0.5901390 0.1564392 0.1550863
## x0        NA  1        NA        NA        NA        NA        NA        NA
## x1 0.8254176 NA 1.0000000 0.5582882 0.5967358 0.6691975 0.1877143 0.2245796
## x2 0.4261169 NA 0.5582882 1.0000000 0.4933310 0.4454779 0.1472331 0.3432934
## x3 0.6236782 NA 0.5967358 0.4933310 1.0000000 0.6403144 0.1159652 0.5316198
## x4 0.5901390 NA 0.6691975 0.4454779 0.6403144 1.0000000 0.3768830 0.5741862
## x5 0.1564392 NA 0.1877143 0.1472331 0.1159652 0.3768830 1.0000000 0.2833432
## x6 0.1550863 NA 0.2245796 0.3432934 0.5316198 0.5741862 0.2833432 1.0000000
#pemeriksaan antara variabel x1, x3, x4
reg <- lm(y~x1+x3+x4, data= Data) 
summary(reg)
## 
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6282  -5.8107   0.5115   6.3946  10.3509 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.52260    8.30481   1.267    0.216    
## x1           0.65349    0.13637   4.792 5.82e-05 ***
## x3           0.22069    0.14967   1.475    0.152    
## x4          -0.02864    0.18245  -0.157    0.876    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.943 on 26 degrees of freedom
## Multiple R-squared:  0.7083, Adjusted R-squared:  0.6746 
## F-statistic: 21.04 on 3 and 26 DF,  p-value: 3.957e-07
#pemeriksaan antara variabel x2, x5, x6
reg <- lm(y~x2+x5+x6, data= Data) 
summary(reg)
## 
## Call:
## lm(formula = y ~ x2 + x5 + x6, data = Data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.0229  -5.0810  -0.6476   5.0387  22.7583 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 34.25069   17.97030   1.906   0.0678 .
## x2           0.41490    0.18716   2.217   0.0356 *
## x5           0.12260    0.22665   0.541   0.5932  
## x6          -0.01931    0.22955  -0.084   0.9336  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.56 on 26 degrees of freedom
## Multiple R-squared:  0.1908, Adjusted R-squared:  0.0974 
## F-statistic: 2.043 on 3 and 26 DF,  p-value: 0.1324
#pemeriksaan antara variabel x1, x3, x6
reg <- lm(y~x1+x3+x6, data= Data) 
summary(reg)
## 
## Call:
## lm(formula = y ~ x1 + x3 + x6, data = Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.217  -5.377   0.967   6.078  11.540 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  13.5777     7.5439   1.800   0.0835 .  
## x1            0.6227     0.1181   5.271 1.65e-05 ***
## x3            0.3124     0.1542   2.026   0.0532 .  
## x6           -0.1870     0.1449  -1.291   0.2082    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.734 on 26 degrees of freedom
## Multiple R-squared:  0.7256, Adjusted R-squared:  0.6939 
## F-statistic: 22.92 on 3 and 26 DF,  p-value: 1.807e-07
#model terbalik
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
ols_step_best_subset(reg)
## Best Subsets Regression
## -----------------------
## Model Index    Predictors
## -----------------------
##      1         x1       
##      2         x1 x3    
##      3         x1 x3 x6 
## -----------------------
## 
##                                                     Subsets Regression Summary                                                     
## -----------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                             
## Model    R-Square    R-Square    R-Square     C(p)       AIC         SBIC        SBC         MSEP         FPE       HSP       APC  
## -----------------------------------------------------------------------------------------------------------------------------------
##   1        0.6813      0.6699      0.6379    4.1956    205.7638    120.6015    209.9674    1467.4370    52.1669    1.8114    0.3642 
##   2        0.7080      0.6864      0.6402    3.6657    205.1387    120.4944    210.7435    1396.1991    51.1153    1.7872    0.3569 
##   3        0.7256      0.6939       0.642    4.0000    205.2758    121.3229    212.2818    1364.6223    51.3971    1.8140    0.3588 
## -----------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria
#selang kepercayaan (1-alfa)*100%
t <- qt(.975, df <- n-p) 
BB_b0 <- b0-t*se_b0 
BA_b0 <- b0+t*se_b0 

BB_b1 <- b1-t*se_b1 
BA_b1 <- b1+t*se_b1 

BB_b3 <- b3-t*se_b3 
BA_b3 <- b3+t*se_b3 

BB_b6 <- b6-t*se_b6 
BA_b6 <- b6+t*se_b6

Batas_Bawah <- as.matrix(c(round(BB_b0,6),round(BB_b1,6),round(BB_b3,6), round(BB_b6,6))) 
Batas_Atas <- as.matrix(c(round(BA_b0,6),round(BA_b1,6),round(BA_b3,6), round(BA_b6,6)))

selangKepercayaan <- cbind(Batas_Bawah, Batas_Atas)
colnames(selangKepercayaan ) <- c("Batas bawah Selang (2.5%)", "Batas atas Selang (97.5%)") 
rownames(selangKepercayaan ) <- c("Intersept", "b1", "b3", "b6")
selangKepercayaan
##           Batas bawah Selang (2.5%) Batas atas Selang (97.5%)
## Intersept                -12.628360                 34.202512
## b1                         0.287930                  0.938445
## b3                        -0.020154                  0.660818
## b6                        -0.577119                  0.143005