library(car)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
data(Salaries) 
head(Salaries) 
##        rank discipline yrs.since.phd yrs.service  sex salary
## 1      Prof          B            19          18 Male 139750
## 2      Prof          B            20          16 Male 173200
## 3  AsstProf          B             4           3 Male  79750
## 4      Prof          B            45          39 Male 115000
## 5      Prof          B            40          41 Male 141500
## 6 AssocProf          B             6           6 Male  97000
str(Salaries)
## 'data.frame':    397 obs. of  6 variables:
##  $ rank         : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
##  $ discipline   : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
##  $ yrs.since.phd: int  19 20 4 45 40 6 30 45 21 18 ...
##  $ yrs.service  : int  18 16 3 39 41 6 23 45 20 18 ...
##  $ sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
##  $ salary       : int  139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
attach(Salaries)

Proses memanggil data pada package dan melihat 6 observasi pertama pada data, kemudian dilakukan pengecekan str untuk melihat tipe data.

summary(Salaries) #statistika deskriptif
##         rank     discipline yrs.since.phd    yrs.service        sex     
##  AsstProf : 67   A:181      Min.   : 1.00   Min.   : 0.00   Female: 39  
##  AssocProf: 64   B:216      1st Qu.:12.00   1st Qu.: 7.00   Male  :358  
##  Prof     :266              Median :21.00   Median :16.00               
##                             Mean   :22.31   Mean   :17.61               
##                             3rd Qu.:32.00   3rd Qu.:27.00               
##                             Max.   :56.00   Max.   :60.00               
##      salary      
##  Min.   : 57800  
##  1st Qu.: 91000  
##  Median :107300  
##  Mean   :113706  
##  3rd Qu.:134185  
##  Max.   :231545
#var numberik
Vnum=cbind(yrs.since.phd,yrs.service,salary)
head(Vnum)
##      yrs.since.phd yrs.service salary
## [1,]            19          18 139750
## [2,]            20          16 173200
## [3,]             4           3  79750
## [4,]            45          39 115000
## [5,]            40          41 141500
## [6,]             6           6  97000
pairs(Vnum) #membuat matriks scatter plot

#Variabel kategorik
par(mfrow=c(1,3))
boxplot(salary~rank)
boxplot(salary~discipline)
boxplot(salary~sex)

Membuat matriks scatter plot dengan melihat statistika deskriptif dan boxplot pada variabel kategorik

model1=lm(salary~yrs.service + yrs.since.phd + rank + discipline + sex, data=Salaries)
resi=model1$residuals #menyimpan resdiual          
yfitted=model1$fitted.values #menyimpan fitted values
summary(model1)
## 
## Call:
## lm(formula = salary ~ yrs.service + yrs.since.phd + rank + discipline + 
##     sex, data = Salaries)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -65248 -13211  -1775  10384  99592 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    65955.2     4588.6  14.374  < 2e-16 ***
## yrs.service     -489.5      211.9  -2.310  0.02143 *  
## yrs.since.phd    535.1      241.0   2.220  0.02698 *  
## rankAssocProf  12907.6     4145.3   3.114  0.00198 ** 
## rankProf       45066.0     4237.5  10.635  < 2e-16 ***
## disciplineB    14417.6     2342.9   6.154 1.88e-09 ***
## sexMale         4783.5     3858.7   1.240  0.21584    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22540 on 390 degrees of freedom
## Multiple R-squared:  0.4547, Adjusted R-squared:  0.4463 
## F-statistic:  54.2 on 6 and 390 DF,  p-value: < 2.2e-16

menampilkan ringkasan statistik dari model regresi atau objek statistik lain yang telah dibuat.

library(regclass)
## Warning: package 'regclass' was built under R version 4.3.3
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.3.3
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.3.3
## Loading required package: VGAM
## Warning: package 'VGAM' was built under R version 4.3.3
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
## 
##     logit
## Loading required package: rpart
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## Important regclass change from 1.3:
## All functions that had a . in the name now have an _
## all.correlations -> all_correlations, cor.demo -> cor_demo, etc.
VIF(model1)
##                   GVIF Df GVIF^(1/(2*Df))
## yrs.service   5.923038  1        2.433729
## yrs.since.phd 7.518936  1        2.742068
## rank          2.013193  2        1.191163
## discipline    1.064105  1        1.031555
## sex           1.030805  1        1.015285
#grafik
hist(resi) #plot histogram

qqnorm(resi) #produces a normal QQ plot of the variale
qqline(resi) #adds a reference line

boxplot(resi) #boxplot untuk cek outlier

Menampilkan QQ plot dari variabel

ks.test(resi,pnorm,mean=mean(resi),sd=sd(resi)) #KS test
## Warning in ks.test.default(resi, pnorm, mean = mean(resi), sd = sd(resi)): ties
## should not be present for the Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  resi
## D = 0.08294, p-value = 0.00849
## alternative hypothesis: two-sided
shapiro.test(resi) #shapiro wilks test
## 
##  Shapiro-Wilk normality test
## 
## data:  resi
## W = 0.96857, p-value = 1.555e-07
library(nortest) #package uji normalitas dengan AD
 ad.test(resi) #AD test
## 
##  Anderson-Darling normality test
## 
## data:  resi
## A = 3.1395, p-value = 6.965e-08

Melakukan uji kolmogorov-smirnov, uji shapiro wilk, dan uji anderson-darling untuk menguji apakah suatu data berasal dari distribusi normal.

 plot(yfitted,resi) #plot antara ytopi residual

 library(lmtest) #package untuk uji breusch pagan
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'lmtest'
## The following object is masked from 'package:VGAM':
## 
##     lrtest
 bptest(model1)
## 
##  studentized Breusch-Pagan test
## 
## data:  model1
## BP = 65.055, df = 6, p-value = 4.205e-12
library(skedastic) #package untuk uji glejser
## Warning: package 'skedastic' was built under R version 4.3.3
glejser(model1) 
## # A tibble: 1 × 4
##   statistic  p.value parameter alternative
##       <dbl>    <dbl>     <dbl> <chr>      
## 1      136. 6.32e-27         6 greater
plot(yfitted,resi) #plot antara ytopi dengan residual

Untuk menguji apakah residual dari model regresi memiliki varian yang sama (homoskedastisitas). Pengecekan apakah model regresi memenuhi asumsi bahwa error-nya punya varian yang konstan (homoskedastisitas), baik secara visual maupun dengan uji statistik (Breusch-Pagan & Glejser).

plot(as.ts(resi)) #plot antara waktu dengan residual

pacf(resi) #PACF Residual

#Runs test menguji acak, jika kurang dari p value maka tidak acak
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.3.3
## Registered S3 methods overwritten by 'proxy':
##   method               from    
##   print.registry_field registry
##   print.registry_entry registry
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:regclass':
## 
##     VIF
## The following object is masked from 'package:VGAM':
## 
##     Rank
## The following object is masked from 'package:car':
## 
##     Recode
RunsTest(resi,exact=FALSE)
## 
##  Runs Test for Randomness
## 
## data:  resi
## z = -2.613, runs = 173, m = 199, n = 198, p-value = 0.008975
## alternative hypothesis: true number of runs is not equal the expected number
## sample estimates:
## median(x) 
## -1774.802
#durbin watson uji autokorelasi, jika kurang dari p-val maka terdapat autokorelasi
dwtest(model1,alternative="two.sided") #uji durbin watson
## 
##  Durbin-Watson test
## 
## data:  model1
## DW = 1.9188, p-value = 0.3853
## alternative hypothesis: true autocorrelation is not 0

Menguji apakah residual dari model regresi bersifat independen atau acak. Pengujian dilakukan secara visual melalui plot residual terhadap waktu dan grafik PACF, serta secara statistik menggunakan uji Runs Test dan Durbin-Watson. Jika hasil menunjukkan pola atau p-value < 0.05, maka residual tidak independen atau mengandung autokorelasi, yang berarti asumsi klasik regresi dilanggar.