Asumsi Regresi Linier Berganda

Data

Data yang digunakan dalam praktikum ini adalah data BodyFat.

dengan,

X1: Triceps Skinfold Thickness

X2: Thigh Circumference

X3: Midarm Circumference

Y: Body Fat

library(readxl)
bodyf<- read_excel("D:/data_bodyfat.xlsx")
print.data.frame(bodyf[,-1]) #menghapus kolom 1
##    Triceps_X1 Thigh_X2 Midarm_X3 Bodyfat_Y
## 1        19.5     43.1      29.1      11.9
## 2        24.7     49.8      28.2      22.8
## 3        30.7     51.9      37.0      18.7
## 4        29.8     54.3      31.1      20.1
## 5        19.1     42.2      30.9      12.9
## 6        25.6     53.9      23.7      21.7
## 7        31.4     58.5      27.6      27.1
## 8        27.9     52.1      30.6      25.4
## 9        22.1     49.9      23.2      21.3
## 10       25.5     53.5      24.8      19.3
## 11       31.1     56.6      30.0      25.4
## 12       30.4     56.7      28.3      27.2
## 13       18.7     46.5      23.0      11.7
## 14       19.7     44.2      28.6      17.8
## 15       14.6     42.7      21.3      12.8
## 16       29.5     54.4      30.1      23.9
## 17       27.7     55.3      25.7      22.6
## 18       30.2     58.6      24.6      25.4
## 19       22.7     48.2      27.1      14.8
## 20       25.2     51.0      27.5      21.1

Persamaan Regresi

modela<- lm(formula = Bodyfat_Y ~ Triceps_X1+Thigh_X2+Midarm_X3, data=bodyf)
summary(modela)
## 
## Call:
## lm(formula = Bodyfat_Y ~ Triceps_X1 + Thigh_X2 + Midarm_X3, data = bodyf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7263 -1.6111  0.3923  1.4656  4.1277 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  117.085     99.782   1.173    0.258
## Triceps_X1     4.334      3.016   1.437    0.170
## Thigh_X2      -2.857      2.582  -1.106    0.285
## Midarm_X3     -2.186      1.595  -1.370    0.190
## 
## Residual standard error: 2.48 on 16 degrees of freedom
## Multiple R-squared:  0.8014, Adjusted R-squared:  0.7641 
## F-statistic: 21.52 on 3 and 16 DF,  p-value: 7.343e-06

ANOVA

anova (modela)
## Analysis of Variance Table
## 
## Response: Bodyfat_Y
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Triceps_X1  1 352.27  352.27 57.2768 1.131e-06 ***
## Thigh_X2    1  33.17   33.17  5.3931   0.03373 *  
## Midarm_X3   1  11.55   11.55  1.8773   0.18956    
## Residuals  16  98.40    6.15                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interval Kepercayaan

confint(modela, level = 0.95)
##                  2.5 %     97.5 %
## (Intercept) -94.444550 328.613940
## Triceps_X1   -2.058507  10.726691
## Thigh_X2     -8.330476   2.616780
## Midarm_X3    -5.568367   1.196247

Asumsi 1: Rata-rata galat diasumsikan bernilai nol

plot(modela$fitted.values,modela$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Uji Asumsi Rata-rata Galat bernilai nol")

Rata-rata Galat bernilai nol jika titik menyebar disekitar galat sama dengan nol.

Asumsi 2: Galat saling bebas atau tidak terjadi Autokorelasi

plot(modela$fitted.values,modela$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Uji Galat saling bebas")

Galat saling bebas jika titik menyebar tidak berpola. Atau lakukan pengujian hipotesis sebagai berikut,

library(lmtest)
dwtest(modela) # p-value < 0.05 H0 ditolak atau terjadi autokorelasi
## 
##  Durbin-Watson test
## 
## data:  modela
## DW = 2.2429, p-value = 0.6698
## alternative hypothesis: true autocorrelation is greater than 0
library(car)
dwt(modela)
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1677286      2.242915   0.646
##  Alternative hypothesis: rho != 0

Asumsi 3: Galat berdistribusi normal

qqnorm(modela$residuals,ylab = "Raw Residuals")
qqline(modela$residuals)

Selain dengan plot, pengujian asumsi ini dapat dilakukan melalui pengujian hipotesis sebagai berikut,

shapiro.test(modela$residuals) 
## 
##  Shapiro-Wilk normality test
## 
## data:  modela$residuals
## W = 0.96603, p-value = 0.6698
library(nortest)
ad.test(modela$residuals)
## 
##  Anderson-Darling normality test
## 
## data:  modela$residuals
## A = 0.24183, p-value = 0.7372
lillie.test(modela$residuals)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  modela$residuals
## D = 0.10602, p-value = 0.8044

H0 ditolak atau Galat tidak berdistribusi normal jika p-value < alpha.

Asumsi 4: Ragam Galat diasumsikan konstan atau tidak terjadi Heteroskedastisitas

plot(modela$fitted.values,modela$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Uji Ragam Galat konstan")

Ragam Galat konstan jika titik menyebar tidak berpola

Atau lakukan pengujian hipotesis sebagai berikut,

gqtest(modela) #Goldfeld-Quandt test,
## 
##  Goldfeld-Quandt test
## 
## data:  modela
## GQ = 0.72256, df1 = 6, df2 = 6, p-value = 0.6484
## alternative hypothesis: variance increases from segment 1 to 2
bptest(modela) #studentized Breusch-Pagan test
## 
##  studentized Breusch-Pagan test
## 
## data:  modela
## BP = 5.1452, df = 3, p-value = 0.1615
library(car)
ncvTest(modela)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.264838, Df = 1, p = 0.26074

H0 ditolak atau Ragam Galat tidak konstan jika p-value < alpha

Asumsi 5: Outlier, Leverage dan Influential data

Outlier

#outlier
res.std <- rstandard(modela) #studentized residuals stored in vector res.std (Kutner, page 394)
# Identifikasi outlier berdasarkan standardized residuals
outliers <- which(abs(res.std) > 3)

# Menampilkan indeks observasi yang dianggap sebagai outlier
print(outliers)
## named integer(0)
#plot Standardized residual in y axis. X axis will be the index or row names
plot(res.std, ylab="Standardized Residual", ylim=c(-3.5,3.5))

#add horizontal lines 3 and -3 to identify extreme values
abline(h =c(-3,0,3), lty = 2)

Leverage

# Membuat plot leverage dengan faraway
leveragePlots(modela)

Influential

#influential
cooksd <- cooks.distance(modela)
head(cooksd)
##           1           2           3           4           5           6 
## 0.279049282 0.059587529 0.298962809 0.053166193 0.046928356 0.002095252
cutoff <- 4/((nrow(bodyf$Bodyfat_Y)-length(modela$coefficients)-1))
plot(modela, which = 4, cook.levels = cutoff)

plot(modela,which=5)