library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
salary<-read.csv("C:/Users/nandh/Desktop/SEMESTER 6/DATMIN/Analisis Regresi/Salary_dataset.csv")
salary<-salary[,-1]
head(salary)
## YearsExperience Salary
## 1 1.2 39344
## 2 1.4 46206
## 3 1.6 37732
## 4 2.1 43526
## 5 2.3 39892
## 6 3.0 56643
plot(salary$YearsExperience, salary$Salary,xlab = "Years Experience", ylab = "Salary",pch = 16)
#Estimasi Parameter Regresi (Manual)
salary$xdif <- salary$YearsExperience-mean(salary$YearsExperience)
salary$ydif <- salary$Salary-mean(salary$Salary)
salary$crp <- salary$xdif*salary$ydif
salary$xsq <- salary$xdif^2
#estimator b0 dan b1
b1 <- sum(salary$crp)/sum(salary$xsq)
b1
## [1] 9449.962
#Parameter Estimates
b0 <- mean(salary$Salary) - b1 * mean(salary$YearsExperience)
b0
## [1] 24848.2
#Persamaan regresi dengan R Function
model1<-lm(Salary~YearsExperience,data=salary)
summary(model1)
##
## Call:
## lm(formula = Salary ~ YearsExperience, data = salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7958.0 -4088.5 -459.9 3372.6 11448.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24848.2 2306.7 10.77 1.82e-11 ***
## YearsExperience 9450.0 378.8 24.95 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5788 on 28 degrees of freedom
## Multiple R-squared: 0.957, Adjusted R-squared: 0.9554
## F-statistic: 622.5 on 1 and 28 DF, p-value: < 2.2e-16
#Koefisien Determinasi
JKG<-sum((salary$Salary-model1$fitted.values)^2)
JKG
## [1] 938128552
JKR<-sum(((model1$fitted.values-mean(salary$Salary))^2))
JKR
## [1] 20856849300
JKT<-sum((salary$Salary-mean(salary$Salary))^2)
JKT
## [1] 21794977852
#Fungsi Anova
anova(model1)
## Analysis of Variance Table
##
## Response: Salary
## Df Sum Sq Mean Sq F value Pr(>F)
## YearsExperience 1 2.0857e+10 2.0857e+10 622.51 < 2.2e-16 ***
## Residuals 28 9.3813e+08 3.3505e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#keofisien determinasi
r_sqr<-(JKT-JKG)/JKT
r_sqr
## [1] 0.9569567
#Mencari F tabel
qf(0.95,1,28)
## [1] 4.195972
#Uji t b1
MSE<-JKG/(length(salary$Salary)-2)
MSE
## [1] 33504591
sb1sqr<-MSE/sum((salary$YearsExperience-mean(salary$YearsExperience))^2)
sb1<-sqrt(sb1sqr)
b1<-9450.0
tb1<-b1/sb1
tb1
## [1] 24.95019
ttabel<-qt(0.975,length(salary$Salary)-2)
ttabel
## [1] 2.048407
#Uji t untuk b0
sb0sqr<-MSE*(1/length(salary$Salary)+((mean(salary$YearsExperience)^2)/sum((salary$YearsExperience-mean(salary$YearsExperience))^2)))
sb0<-sqrt(sb0sqr)
tb0<-b0/sb0
tb0
## [1] 10.7724
ttabel<-qt(0.975,length(salary$Salary)-2)
ttabel
## [1] 2.048407
#Selang kepercayaan
confint(model1,level=0.95)
## 2.5 % 97.5 %
## (Intercept) 20123.238 29573.17
## YearsExperience 8674.119 10225.81
#Pengecekan Asumsi
qqnorm(model1$residuals,ylab = "Residuals") #Normalitas
qqline(model1$residuals)
#anderson darling
library(nortest)
ad.test(model1$residuals)
##
## Anderson-Darling normality test
##
## data: model1$residuals
## A = 0.35904, p-value = 0.428
#shapiro wilk
shapiro.test(model1$residuals)
##
## Shapiro-Wilk normality test
##
## data: model1$residuals
## W = 0.95234, p-value = 0.1952
#kolmogorov smirnov
lillie.test(model1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: model1$residuals
## D = 0.083446, p-value = 0.8572
plot(model1$fitted.values,model1$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Uji Ragam Galat konstan") #Homogenitas
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model1) #studentized Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 0.39905, df = 1, p-value = 0.5276
#Non Autokorelasi
plot(model1$fitted.values,model1$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Non-Autokorelasi")
library(lmtest)
dwtest(model1)
##
## Durbin-Watson test
##
## data: model1
## DW = 1.648, p-value = 0.1178
## alternative hypothesis: true autocorrelation is greater than 0
#REGRESI LINIER GANDA
library(readxl)
harga<- read_excel("C:/Users/nandh/Desktop/SEMESTER 6/DATMIN/Analisis Regresi/hargarumah.xlsx")
head(harga)
## # A tibble: 6 × 4
## Luas_Tanah_X1 Kamar_TidurX2 Kamar_MandiX3 Harga_RumahY
## <dbl> <dbl> <dbl> <dbl>
## 1 150 3 2 250000
## 2 200 4 3 300000
## 3 120 2 1 180000
## 4 250 4 2 320000
## 5 180 3 2 270000
## 6 300 5 3 380000
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
pairs.panels(cbind(harga$Luas_Tanah_X1 ,harga$Kamar_TidurX2,harga$Kamar_MandiX3))
#Estimasi Parameter Regresi
Y<-matrix(harga$Harga_RumahY)
N<-nrow(Y) #mengetahui jumlah baris Y
X<-matrix(1,N)
X<-cbind(X,harga$Luas_Tanah_X1,harga$Kamar_TidurX2, harga$Kamar_MandiX3)
J<-matrix(1,N,N)
Xt<-t(X)
A<-Xt %*% X
A
## [,1] [,2] [,3] [,4]
## [1,] 20 4170 72 43
## [2,] 4170 913300 15760 9440
## [3,] 72 15760 274 164
## [4,] 43 9440 164 101
B<-solve(A)
B
## [,1] [,2] [,3] [,4]
## [1,] 1.048179872 -0.0044610992 -0.046395432 0.0460385439
## [2,] -0.004461099 0.0001661512 -0.008049806 -0.0005591244
## [3,] -0.046395432 -0.0080498057 0.594059799 -0.1924815608
## [4,] 0.046038544 -0.0005591244 -0.192481561 0.3551034975
C<- Xt %*% Y
C
## [,1]
## [1,] 5670000
## [2,] 1228400000
## [3,] 21250000
## [4,] 12770000
Beta<-B %*% C
Beta
## [,1]
## [1,] 65174.8751
## [2,] 607.2647
## [3,] 14337.7746
## [4,] 18648.5843
modela<- lm(formula = Harga_RumahY ~ Luas_Tanah_X1+Kamar_TidurX2+Kamar_MandiX3, data=harga)
summary(modela) #Menggunakan R
##
## Call:
## lm(formula = Harga_RumahY ~ Luas_Tanah_X1 + Kamar_TidurX2 + Kamar_MandiX3,
## data = harga)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8720 -5420 -2457 3549 15207
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65174.9 7989.7 8.157 4.30e-07 ***
## Luas_Tanah_X1 607.3 100.6 6.037 1.73e-05 ***
## Kamar_TidurX2 14337.8 6014.9 2.384 0.02987 *
## Kamar_MandiX3 18648.6 4650.4 4.010 0.00101 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7804 on 16 degrees of freedom
## Multiple R-squared: 0.9812, Adjusted R-squared: 0.9777
## F-statistic: 278.5 on 3 and 16 DF, p-value: 5.147e-14
#Koefisien Determinasi
JKT<-(t(Y)%*%Y)-((1/N)*(t(Y)%*%J%*%Y))
JKT
## [,1]
## [1,] 5.1855e+10
JKG<-(t(Y)%*%Y)-(t(Beta)%*%t(X)%*%Y)
JKG
## [,1]
## [1,] 974426997
JKR<-JKT-JKG
JKR
## [,1]
## [1,] 50880573003
anova(modela)
## Analysis of Variance Table
##
## Response: Harga_RumahY
## Df Sum Sq Mean Sq F value Pr(>F)
## Luas_Tanah_X1 1 4.8681e+10 4.8681e+10 799.336 4.357e-15 ***
## Kamar_TidurX2 1 1.2203e+09 1.2203e+09 20.037 0.0003818 ***
## Kamar_MandiX3 1 9.7935e+08 9.7935e+08 16.081 0.0010104 **
## Residuals 16 9.7443e+08 6.0902e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#keofisien determinasi
r_sqr<-(JKT-JKG)/JKT
r_sqr
## [,1]
## [1,] 0.9812086
#Uji F
#Mencari F tabel
qf(0.95,3,16)
## [1] 3.238872
#Uji t untuk b0
KTG<-JKG/16
KTG<-as.numeric(KTG)
KTG
## [1] 60901687
Sb<-KTG*solve(t(X)%*%X)
Sb
## [,1] [,2] [,3] [,4]
## [1,] 63835922.8 -271688.47 -2825560.1 2803825.00
## [2,] -271688.5 10118.89 -490246.7 -34051.62
## [3,] -2825560.1 -490246.75 36179244.1 -11722451.82
## [4,] 2803825.0 -34051.62 -11722451.8 21626402.16
thitung0<-Beta[1,1]/sqrt(Sb[1,1])
thitung0
## [1] 8.157323
ttabel<-qt(0.975,length(harga$Harga_RumahY)-2)
ttabel
## [1] 2.100922
#Uji t untuk b1
KTG<-JKG/16
KTG<-as.numeric(KTG)
KTG
## [1] 60901687
Sb<-KTG*solve(t(X)%*%X)
Sb
## [,1] [,2] [,3] [,4]
## [1,] 63835922.8 -271688.47 -2825560.1 2803825.00
## [2,] -271688.5 10118.89 -490246.7 -34051.62
## [3,] -2825560.1 -490246.75 36179244.1 -11722451.82
## [4,] 2803825.0 -34051.62 -11722451.8 21626402.16
thitung1<-Beta[2,1]/sqrt(Sb[2,2])
thitung1
## [1] 6.036868
ttabel<-qt(0.975,length(harga$Harga_RumahY)-3-1)
ttabel
## [1] 2.119905
#Selang kepercayaan
confint(modela,level=0.95)
## 2.5 % 97.5 %
## (Intercept) 48237.3859 82112.3643
## Luas_Tanah_X1 394.0177 820.5116
## Kamar_TidurX2 1586.7171 27088.8321
## Kamar_MandiX3 8790.1353 28507.0334
#Pengecekan Asumsi
qqnorm(modela$residuals,ylab = "Residuals") #Normalitas
qqline(modela$residuals)
#kolmogorov smirnov
lillie.test(modela$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: modela$residuals
## D = 0.16373, p-value = 0.172
plot(modela$fitted.values,modela$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Uji Ragam Galat konstan") #Homogenitas
library(lmtest)
bptest(modela) #studentized Breusch-Pagan test
##
## studentized Breusch-Pagan test
##
## data: modela
## BP = 0.90428, df = 3, p-value = 0.8244
#Non Autokorelasi
plot(modela$fitted.values,modela$residuals,
xlab="Fitted Values",ylab="Residuals",
main="Plot Non-Autokorelasi")
library(lmtest)
dwtest(modela)
##
## Durbin-Watson test
##
## data: modela
## DW = 1.4549, p-value = 0.1034
## alternative hypothesis: true autocorrelation is greater than 0
#Non-Multikolinearitas
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
vif(modela)
## Luas_Tanah_X1 Kamar_TidurX2 Kamar_MandiX3
## 7.286559 8.792085 3.036135