options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("psych")
## Installing package into 'C:/Users/Fathoni Sabri/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'psych' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Fathoni Sabri\AppData\Local\Temp\RtmpkVzU2V\downloaded_packages
library(readxl)

Input Data

data <- read_excel("C:/Users/Fathoni Sabri/Downloads/Copy of data_rumah_bersih(d).xlsx")
data
## # A tibble: 260 × 7
##        a    KT    KM    GM    LT    LB Lok            
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>          
##  1  3.95     4     4     1   135   230 Jakarta Selatan
##  2  2.7      3     3     1   122   230 Jakarta Utara  
##  3  4.2      3     3     1   120   224 Jakarta Utara  
##  4  2        3     3     2   150   220 Jakarta Barat  
##  5  4.49     3     3     1   120   220 Jakarta Utara  
##  6  4.5      3     3     2   110   220 Jakarta Utara  
##  7  4        4     4     1   105   220 Jakarta Utara  
##  8  3.6      3     3     2   101   220 Jakarta Selatan
##  9  4.95     4     3     1   126   216 Jakarta Utara  
## 10  3.5      4     4     1   100   215 Jakarta Utara  
## # ℹ 250 more rows
install.packages("psych")
## Installing package into 'C:/Users/Fathoni Sabri/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'psych' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Fathoni Sabri\AppData\Local\Temp\RtmpkVzU2V\downloaded_packages
library(psych)

Eksplorasi Data

pairs.panels(data)

install.packages("dplyr")     
## Installing package into 'C:/Users/Fathoni Sabri/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Fathoni
## Sabri\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll to
## C:\Users\Fathoni
## Sabri\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll: Permission
## denied
## Warning: restored 'dplyr'
## 
## The downloaded binary packages are in
##  C:\Users\Fathoni Sabri\AppData\Local\Temp\RtmpkVzU2V\downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Menghapus Peubah yang Tidak Digunakan

# buang kolom Lok
data <- select(data, -Lok)
str(data)
## tibble [260 × 6] (S3: tbl_df/tbl/data.frame)
##  $ a : num [1:260] 3.95 2.7 4.2 2 4.49 4.5 4 3.6 4.95 3.5 ...
##  $ KT: num [1:260] 4 3 3 3 3 3 4 3 4 4 ...
##  $ KM: num [1:260] 4 3 3 3 3 3 4 3 3 4 ...
##  $ GM: num [1:260] 1 1 1 2 1 2 1 2 1 1 ...
##  $ LT: num [1:260] 135 122 120 150 120 110 105 101 126 100 ...
##  $ LB: num [1:260] 230 230 224 220 220 220 220 220 216 215 ...

Analisis Regresi Linier Berganda

Pemodelan

formula1 <- a~.

Seleksi Peubah

Backward

step(lm(formula1, data=data),direction="backward")
## Start:  AIC=-121.74
## a ~ KT + KM + GM + LT + LB
## 
##        Df Sum of Sq    RSS      AIC
## - GM    1    0.0171 155.46 -123.711
## <none>              155.45 -121.740
## - KM    1    4.9688 160.41 -115.559
## - KT    1    5.0850 160.53 -115.371
## - LT    1   23.0940 178.54  -87.726
## - LB    1   24.0591 179.50  -86.324
## 
## Step:  AIC=-123.71
## a ~ KT + KM + LT + LB
## 
##        Df Sum of Sq    RSS      AIC
## <none>              155.46 -123.711
## - KT    1    5.1560 160.62 -117.228
## - KM    1    5.2624 160.73 -117.056
## - LT    1   23.4242 178.89  -89.221
## - LB    1   24.0422 179.50  -88.324
## 
## Call:
## lm(formula = a ~ KT + KM + LT + LB, data = data)
## 
## Coefficients:
## (Intercept)           KT           KM           LT           LB  
##    0.086546    -0.241140     0.247753     0.011932     0.009493

Forward

step(lm(a~1,data=data),direction="forward",
     scope=~KT + KM + GM + LT + LB)
## Start:  AIC=2.79
## a ~ 1
## 
##        Df Sum of Sq    RSS     AIC
## + LB    1    79.043 181.75 -89.090
## + LT    1    57.845 202.95 -60.408
## + KM    1    25.618 235.18 -22.089
## + KT    1     6.909 253.88  -2.188
## + GM    1     2.300 258.50   2.490
## <none>              260.80   2.793
## 
## Step:  AIC=-89.09
## a ~ LB
## 
##        Df Sum of Sq    RSS      AIC
## + LT    1   18.2239 163.53 -114.561
## <none>              181.75  -89.090
## + KT    1    1.2707 180.48  -88.914
## + KM    1    0.8498 180.90  -88.309
## + GM    1    0.6544 181.10  -88.028
## 
## Step:  AIC=-114.56
## a ~ LB + LT
## 
##        Df Sum of Sq    RSS     AIC
## + KM    1   2.90892 160.62 -117.23
## + KT    1   2.80247 160.73 -117.06
## <none>              163.53 -114.56
## + GM    1   0.34589 163.18 -113.11
## 
## Step:  AIC=-117.23
## a ~ LB + LT + KM
## 
##        Df Sum of Sq    RSS     AIC
## + KT    1    5.1560 155.46 -123.71
## <none>              160.62 -117.23
## + GM    1    0.0881 160.53 -115.37
## 
## Step:  AIC=-123.71
## a ~ LB + LT + KM + KT
## 
##        Df Sum of Sq    RSS     AIC
## <none>              155.46 -123.71
## + GM    1  0.017111 155.45 -121.74
## 
## Call:
## lm(formula = a ~ LB + LT + KM + KT, data = data)
## 
## Coefficients:
## (Intercept)           LB           LT           KM           KT  
##    0.086546     0.009493     0.011932     0.247753    -0.241140

Stepwise

step(lm(formula1,data=data),direction="both")
## Start:  AIC=-121.74
## a ~ KT + KM + GM + LT + LB
## 
##        Df Sum of Sq    RSS      AIC
## - GM    1    0.0171 155.46 -123.711
## <none>              155.45 -121.740
## - KM    1    4.9688 160.41 -115.559
## - KT    1    5.0850 160.53 -115.371
## - LT    1   23.0940 178.54  -87.726
## - LB    1   24.0591 179.50  -86.324
## 
## Step:  AIC=-123.71
## a ~ KT + KM + LT + LB
## 
##        Df Sum of Sq    RSS      AIC
## <none>              155.46 -123.711
## + GM    1    0.0171 155.45 -121.740
## - KT    1    5.1560 160.62 -117.228
## - KM    1    5.2624 160.73 -117.056
## - LT    1   23.4242 178.89  -89.221
## - LB    1   24.0422 179.50  -88.324
## 
## Call:
## lm(formula = a ~ KT + KM + LT + LB, data = data)
## 
## Coefficients:
## (Intercept)           KT           KM           LT           LB  
##    0.086546    -0.241140     0.247753     0.011932     0.009493

Pengujian Asumsi

library(sandwich)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(nortest)
model2 <- lm(formula = a ~ KT + KM + LT + LB, data = data)
summary(model2)
## 
## Call:
## lm(formula = a ~ KT + KM + LT + LB, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1214 -0.4941 -0.1179  0.4620  2.0776 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.086546   0.284979   0.304  0.76161    
## KT          -0.241140   0.082920  -2.908  0.00396 ** 
## KM           0.247753   0.084327   2.938  0.00361 ** 
## LT           0.011932   0.001925   6.199 2.28e-09 ***
## LB           0.009493   0.001512   6.280 1.45e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7808 on 255 degrees of freedom
## Multiple R-squared:  0.4039, Adjusted R-squared:  0.3945 
## F-statistic: 43.19 on 4 and 255 DF,  p-value: < 2.2e-16

Autokorelasi

(dwtest(model2))
## 
##  Durbin-Watson test
## 
## data:  model2
## DW = 2.049, p-value = 0.6137
## alternative hypothesis: true autocorrelation is greater than 0
ifelse(dwtest(model2)$p < 0.05, "Ada Autokorelasi", "Tidak Ada Autokorelasi")
## [1] "Tidak Ada Autokorelasi"

Homogenitas

(homogen_model2 <- bptest(model2))
## 
##  studentized Breusch-Pagan test
## 
## data:  model2
## BP = 11.929, df = 4, p-value = 0.01789
ifelse(homogen_model2$p.value < 0.05, "Ragam Tidak Homogen", "Ragam Homogen")
##                    BP 
## "Ragam Tidak Homogen"

Normalitas

sisaan_model2 <- resid(model2)
(norm_model2 <- lillie.test(sisaan_model2))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  sisaan_model2
## D = 0.067074, p-value = 0.006569
ifelse(norm_model2$p.value < 0.05, "Sisaan Tidak Menyebar Normal", "Sisaan Menyebar Normal")
## [1] "Sisaan Tidak Menyebar Normal"
(norm2_model2 <- shapiro.test(sisaan_model2))
## 
##  Shapiro-Wilk normality test
## 
## data:  sisaan_model2
## W = 0.98871, p-value = 0.03999
ifelse(norm2_model2$p.value < 0.05, "Sisaan Tidak Menyebar Normal", "Sisaan Menyebar Normal")
## [1] "Sisaan Tidak Menyebar Normal"

Penanganan

Ternyata terdapat heterokedastisitas dan sebaran tidak menyebar normal. Maka dari itu, perlu dilakukan penganann dengan bebebrapa opsi :

1. Transformasi Variabel Dependen

library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
# Box–Cox untuk cari lambda
bc <- boxcox(model2, plotit = FALSE)
lambda <- bc$x[which.max(bc$y)]
lambda
## [1] 0.5
data_z <- data

karena Lambda mendekati 0.5, maka tranformasi yang dilakukan adalah mentransformasi peubah dependen menjadi bentuk akarnya

# Box–Cox transform dengan lambda = 0.5
data_z$a_t <- (data$a^0.5 - 1) / 0.5
data_z$a_sqrt <- sqrt(data_z$a)
model_t <- lm(a_t ~ KT + KM + LT + LB, data = data_z)

# Atau jika pakai a_sqrt
model_sqrt <- lm(a_sqrt ~ KT + KM + LT + LB, data = data_z)

summary(model_t)
## 
## Call:
## lm(formula = a_t ~ KT + KM + LT + LB, data = data_z)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.52835 -0.28713 -0.02742  0.29295  1.27072 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.4340238  0.1788808  -2.426  0.01595 *  
## KT          -0.1411224  0.0520485  -2.711  0.00716 ** 
## KM           0.1530332  0.0529321   2.891  0.00417 ** 
## LT           0.0070549  0.0012083   5.839  1.6e-08 ***
## LB           0.0062412  0.0009489   6.577  2.7e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4901 on 255 degrees of freedom
## Multiple R-squared:  0.4064, Adjusted R-squared:  0.3971 
## F-statistic: 43.64 on 4 and 255 DF,  p-value: < 2.2e-16
summary(model_sqrt)
## 
## Call:
## lm(formula = a_sqrt ~ KT + KM + LT + LB, data = data_z)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76418 -0.14357 -0.01371  0.14647  0.63536 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.7829881  0.0894404   8.754 2.88e-16 ***
## KT          -0.0705612  0.0260243  -2.711  0.00716 ** 
## KM           0.0765166  0.0264661   2.891  0.00417 ** 
## LT           0.0035274  0.0006042   5.839 1.60e-08 ***
## LB           0.0031206  0.0004744   6.577 2.70e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2451 on 255 degrees of freedom
## Multiple R-squared:  0.4064, Adjusted R-squared:  0.3971 
## F-statistic: 43.64 on 4 and 255 DF,  p-value: < 2.2e-16

model_t

par(mfrow = c(2,2))
plot(model_t)

library(sandwich); library(lmtest)
coeftest(model_t, vcov = vcovHC(model_t, type = "HC1"))
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) -0.4340238  0.1700174 -2.5528  0.011269 *  
## KT          -0.1411224  0.0539118 -2.6177  0.009383 ** 
## KM           0.1530332  0.0487737  3.1376  0.001903 ** 
## LT           0.0070549  0.0012317  5.7279 2.855e-08 ***
## LB           0.0062412  0.0009907  6.2998 1.299e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

model_sqrt

par(mfrow = c(2,2))
plot(model_sqrt)

coeftest(model_sqrt, vcov = vcovHC(model_sqrt, type = "HC1"))
## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value  Pr(>|t|)    
## (Intercept)  0.78298811  0.08500870  9.2107 < 2.2e-16 ***
## KT          -0.07056121  0.02695587 -2.6177  0.009383 ** 
## KM           0.07651660  0.02438688  3.1376  0.001903 ** 
## LT           0.00352745  0.00061583  5.7279 2.855e-08 ***
## LB           0.00312058  0.00049535  6.2998 1.299e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pemriksaan asumsi

Multikolinieritas

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
vif(model_t)
##       KT       KM       LT       LB 
## 1.336472 1.463211 1.295384 1.667438

Autokorelasi

(uji_autokol_t<-dwtest(model_t))
## 
##  Durbin-Watson test
## 
## data:  model_t
## DW = 2.0424, p-value = 0.5931
## alternative hypothesis: true autocorrelation is greater than 0
ifelse(uji_autokol_t$p < 0.05, "Ada Autokorelasi", "Tidak Ada Autokorelasi")
## [1] "Tidak Ada Autokorelasi"

Homogenitas

(homogen_modelt <- bptest(model_t))
## 
##  studentized Breusch-Pagan test
## 
## data:  model_t
## BP = 6.6732, df = 4, p-value = 0.1542
ifelse(homogen_modelt$p.value < 0.05, "Ragam Tidak Homogen", "Ragam Homogen")
##              BP 
## "Ragam Homogen"

Normalitas: Kolmogorov-Smirnov

sisaan_modelt <- resid(model_t)
(norm_modelt <- lillie.test(sisaan_modelt))
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  sisaan_modelt
## D = 0.042947, p-value = 0.2876
ifelse(norm_modelt$p.value < 0.05, "Sisaan Tidak Menyebar Normal", "Sisaan Menyebar Normal")
## [1] "Sisaan Menyebar Normal"

Normalitas: Shapiro-Wilk

(norm2_modelt <- shapiro.test(sisaan_modelt))
## 
##  Shapiro-Wilk normality test
## 
## data:  sisaan_modelt
## W = 0.99454, p-value = 0.4765
ifelse(norm2_modelt$p.value < 0.05, "Sisaan Tidak Menyebar Normal", "Sisaan Menyebar Normal")
## [1] "Sisaan Menyebar Normal"

R-squared

# Misal model Anda bernama model_t
sum_m <- summary(model_t)

# R-squared
r2       <- sum_m$r.squared
# Adjusted R-squared
r2_adj   <- sum_m$adj.r.squared

print(paste("R-squared       :", round(r2, 4)))
## [1] "R-squared       : 0.4064"
print(paste("Adjusted R-sq   :", round(r2_adj, 4)))
## [1] "Adjusted R-sq   : 0.3971"

Transformasi Balik

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.