library(car)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
data(Salaries)
head(Salaries)
## rank discipline yrs.since.phd yrs.service sex salary
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
str(Salaries)
## 'data.frame': 397 obs. of 6 variables:
## $ rank : Factor w/ 3 levels "AsstProf","AssocProf",..: 3 3 1 3 3 2 3 3 3 3 ...
## $ discipline : Factor w/ 2 levels "A","B": 2 2 2 2 2 2 2 2 2 2 ...
## $ yrs.since.phd: int 19 20 4 45 40 6 30 45 21 18 ...
## $ yrs.service : int 18 16 3 39 41 6 23 45 20 18 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 1 ...
## $ salary : int 139750 173200 79750 115000 141500 97000 175000 147765 119250 129000 ...
attach(Salaries)
Proses memanggil data pada package dan melihat 6 observasi pertama pada data, kemudian dilakukan pengecekan str untuk melihat tipe data.
summary(Salaries) #statistika deskriptif
## rank discipline yrs.since.phd yrs.service sex
## AsstProf : 67 A:181 Min. : 1.00 Min. : 0.00 Female: 39
## AssocProf: 64 B:216 1st Qu.:12.00 1st Qu.: 7.00 Male :358
## Prof :266 Median :21.00 Median :16.00
## Mean :22.31 Mean :17.61
## 3rd Qu.:32.00 3rd Qu.:27.00
## Max. :56.00 Max. :60.00
## salary
## Min. : 57800
## 1st Qu.: 91000
## Median :107300
## Mean :113706
## 3rd Qu.:134185
## Max. :231545
#var numberik
Vnum=cbind(yrs.since.phd,yrs.service,salary)
head(Vnum)
## yrs.since.phd yrs.service salary
## [1,] 19 18 139750
## [2,] 20 16 173200
## [3,] 4 3 79750
## [4,] 45 39 115000
## [5,] 40 41 141500
## [6,] 6 6 97000
pairs(Vnum) #membuat matriks scatter plot
#Variabel kategorik
par(mfrow=c(1,3))
boxplot(salary~rank)
boxplot(salary~discipline)
boxplot(salary~sex)
Membuat matriks scatter plot dengan melihat statistika deskriptif dan
boxplot pada variabel kategorik
model1=lm(salary~yrs.service + yrs.since.phd + rank + discipline + sex, data=Salaries)
resi=model1$residuals #menyimpan resdiual
yfitted=model1$fitted.values #menyimpan fitted values
summary(model1)
##
## Call:
## lm(formula = salary ~ yrs.service + yrs.since.phd + rank + discipline +
## sex, data = Salaries)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65248 -13211 -1775 10384 99592
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65955.2 4588.6 14.374 < 2e-16 ***
## yrs.service -489.5 211.9 -2.310 0.02143 *
## yrs.since.phd 535.1 241.0 2.220 0.02698 *
## rankAssocProf 12907.6 4145.3 3.114 0.00198 **
## rankProf 45066.0 4237.5 10.635 < 2e-16 ***
## disciplineB 14417.6 2342.9 6.154 1.88e-09 ***
## sexMale 4783.5 3858.7 1.240 0.21584
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22540 on 390 degrees of freedom
## Multiple R-squared: 0.4547, Adjusted R-squared: 0.4463
## F-statistic: 54.2 on 6 and 390 DF, p-value: < 2.2e-16
menampilkan ringkasan statistik dari model regresi atau objek statistik lain yang telah dibuat.
library(regclass)
## Warning: package 'regclass' was built under R version 4.3.3
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.3.3
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.3.3
## Loading required package: VGAM
## Warning: package 'VGAM' was built under R version 4.3.3
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:car':
##
## logit
## Loading required package: rpart
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.3.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## Important regclass change from 1.3:
## All functions that had a . in the name now have an _
## all.correlations -> all_correlations, cor.demo -> cor_demo, etc.
VIF(model1)
## GVIF Df GVIF^(1/(2*Df))
## yrs.service 5.923038 1 2.433729
## yrs.since.phd 7.518936 1 2.742068
## rank 2.013193 2 1.191163
## discipline 1.064105 1 1.031555
## sex 1.030805 1 1.015285
#grafik
hist(resi) #plot histogram
qqnorm(resi) #produces a normal QQ plot of the variale
qqline(resi) #adds a reference line
boxplot(resi) #boxplot untuk cek outlier
Menampilkan QQ plot dari variabel
ks.test(resi,pnorm,mean=mean(resi),sd=sd(resi)) #KS test
## Warning in ks.test.default(resi, pnorm, mean = mean(resi), sd = sd(resi)): ties
## should not be present for the Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: resi
## D = 0.08294, p-value = 0.00849
## alternative hypothesis: two-sided
shapiro.test(resi) #shapiro wilks test
##
## Shapiro-Wilk normality test
##
## data: resi
## W = 0.96857, p-value = 1.555e-07
library(nortest) #package uji normalitas dengan AD
ad.test(resi) #AD test
##
## Anderson-Darling normality test
##
## data: resi
## A = 3.1395, p-value = 6.965e-08
Melakukan uji kolmogorov-smirnov, uji shapiro wilk, dan uji anderson-darling untuk menguji apakah suatu data berasal dari distribusi normal.
plot(yfitted,resi) #plot antara ytopi residual
library(lmtest) #package untuk uji breusch pagan
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'lmtest'
## The following object is masked from 'package:VGAM':
##
## lrtest
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 65.055, df = 6, p-value = 4.205e-12
library(skedastic) #package untuk uji glejser
## Warning: package 'skedastic' was built under R version 4.3.3
glejser(model1)
## # A tibble: 1 × 4
## statistic p.value parameter alternative
## <dbl> <dbl> <dbl> <chr>
## 1 136. 6.32e-27 6 greater
plot(yfitted,resi) #plot antara ytopi dengan residual
Untuk menguji apakah residual dari model regresi memiliki varian yang sama (homoskedastisitas). Pengecekan apakah model regresi memenuhi asumsi bahwa error-nya punya varian yang konstan (homoskedastisitas), baik secara visual maupun dengan uji statistik (Breusch-Pagan & Glejser).
plot(as.ts(resi)) #plot antara waktu dengan residual
pacf(resi) #PACF Residual
#Runs test menguji acak, jika kurang dari p value maka tidak acak
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.3.3
## Registered S3 methods overwritten by 'proxy':
## method from
## print.registry_field registry
## print.registry_entry registry
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:regclass':
##
## VIF
## The following object is masked from 'package:VGAM':
##
## Rank
## The following object is masked from 'package:car':
##
## Recode
RunsTest(resi,exact=FALSE)
##
## Runs Test for Randomness
##
## data: resi
## z = -2.613, runs = 173, m = 199, n = 198, p-value = 0.008975
## alternative hypothesis: true number of runs is not equal the expected number
## sample estimates:
## median(x)
## -1774.802
#durbin watson uji autokorelasi, jika kurang dari p-val maka terdapat autokorelasi
dwtest(model1,alternative="two.sided") #uji durbin watson
##
## Durbin-Watson test
##
## data: model1
## DW = 1.9188, p-value = 0.3853
## alternative hypothesis: true autocorrelation is not 0
Menguji apakah residual dari model regresi bersifat independen atau acak. Pengujian dilakukan secara visual melalui plot residual terhadap waktu dan grafik PACF, serta secara statistik menggunakan uji Runs Test dan Durbin-Watson. Jika hasil menunjukkan pola atau p-value < 0.05, maka residual tidak independen atau mengandung autokorelasi, yang berarti asumsi klasik regresi dilanggar.