library('olsrr')
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
library(MASS)
## Warning: package 'MASS' was built under R version 4.2.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
##
## cement
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.2.3
## Loading required package: Matrix
## Loaded glmnet 4.1-8
datapsd <- read.csv("C:/Users/DELL/Documents/KULIAH/Semester 5/PSD/Tugas Minggu 7/online.csv")
#Eksplorasi Plot antara Y dengan X1
plot(datapsd$Marketing.Spend,datapsd$Profit)
#Eksplorasi Plot antara Y dengan X2
plot(datapsd$Administration,datapsd$Profit)
#Eksplorasi Plot antara Y dengan X3
plot(datapsd$Transport,datapsd$Profit)
modelklasik <- lm(Profit~Marketing.Spend+Administration+Transport,data=datapsd)
s.modelklasik <- summary(modelklasik)
s.modelklasik
##
## Call:
## lm(formula = Profit ~ Marketing.Spend + Administration + Transport,
## data = datapsd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49192 -4335 377 6508 33592
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.206e+04 9.201e+03 4.571 3.65e-05 ***
## Marketing.Spend 6.940e-01 5.964e-02 11.637 2.64e-15 ***
## Administration 3.631e-02 7.118e-02 0.510 0.61243
## Transport 6.817e-02 2.179e-02 3.128 0.00305 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13090 on 46 degrees of freedom
## Multiple R-squared: 0.901, Adjusted R-squared: 0.8946
## F-statistic: 139.6 on 3 and 46 DF, p-value: < 2.2e-16
Model regresi klasik yang diperoleh adalah : \[ Profit = 42060+ 0.6940 Marketing.Spend +0.0361Administration +0.06817Transport \] # R Squared dan RSE Model Klasik
# R squared
R2.modelklasik <- s.modelklasik$r.squared
R2.modelklasik
## [1] 0.9010442
# RSE
rse.modelklasik <- s.modelklasik$sigma
rse.modelklasik
## [1] 13086.14
Rsq Model klasik didapatkan $ R^2 =0.9010442$ dan nilai RSE $RSE = 13086.14 $
ols_plot_resid_lev(modelklasik)
plot(modelklasik,which=5)
ols_plot_diagnostics(modelklasik)
influence.measures(modelklasik)
## Influence measures of
## lm(formula = Profit ~ Marketing.Spend + Administration + Transport, data = datapsd) :
##
## dfb.1_ dfb.Mr.S dfb.Admn dfb.Trns dffit cov.r cook.d hat inf
## 1 -0.614352 -0.496627 0.444078 1.054899 1.23390 0.613 3.24e-01 0.1441 *
## 2 -0.019265 0.007061 0.012432 0.014021 0.03470 1.248 3.08e-04 0.1254
## 3 0.063559 0.158484 -0.139703 0.037444 0.31355 1.144 2.47e-02 0.1102
## 4 -0.008964 0.103282 -0.041485 0.044993 0.23131 1.115 1.35e-02 0.0756
## 5 -0.031401 -0.043307 0.047448 0.001729 -0.07742 1.219 1.53e-03 0.1083
## 6 -0.034434 -0.051028 0.057406 -0.013273 -0.11358 1.168 3.29e-03 0.0781
## 7 0.008583 0.193321 0.004205 -0.176291 0.23111 1.256 1.36e-02 0.1533
## 8 0.038105 -0.025812 -0.030499 -0.013041 -0.08061 1.151 1.66e-03 0.0598
## 9 0.001542 -0.000495 -0.001400 -0.000648 -0.00277 1.157 1.96e-06 0.0562
## 10 -0.015292 -0.029150 0.022167 0.004856 -0.04933 1.151 6.21e-04 0.0550
## 11 0.103983 0.138905 -0.103642 -0.084066 0.21716 1.028 1.18e-02 0.0396
## 12 0.168362 0.129267 -0.181312 -0.064454 0.24252 1.076 1.47e-02 0.0619
## 13 -0.008878 0.034355 0.017943 0.007945 0.15526 1.028 6.03e-03 0.0243
## 14 -0.025828 0.003495 0.032552 0.014977 0.08147 1.100 1.69e-03 0.0279
## 15 0.173932 -0.117639 -0.180825 0.029887 -0.32934 1.015 2.68e-02 0.0638
## 16 -0.396857 -1.941697 0.630547 1.154717 -2.11564 0.214 7.31e-01 0.1496 *
## 17 -0.003479 -0.028781 0.011997 0.049013 0.10472 1.081 2.78e-03 0.0258
## 18 0.064751 0.018770 -0.070099 -0.047018 -0.11423 1.113 3.31e-03 0.0435
## 19 -0.010317 0.001719 0.013403 -0.031593 -0.07831 1.107 1.56e-03 0.0307
## 20 0.036521 0.346586 0.084858 -0.484077 0.57122 1.136 8.05e-02 0.1664
## 21 -0.000601 0.006146 0.000433 -0.009952 -0.01526 1.134 5.95e-05 0.0378
## 22 0.173961 0.134350 -0.193097 -0.166505 -0.26224 1.108 1.73e-02 0.0803
## 23 0.027082 0.072213 -0.031647 -0.100814 -0.13659 1.102 4.73e-03 0.0441
## 24 -0.013564 0.042134 0.012695 -0.058781 -0.08554 1.135 1.86e-03 0.0499
## 25 -0.002468 -0.001836 0.002050 0.002097 -0.00320 1.161 2.61e-06 0.0597
## 26 -0.009307 0.005204 0.038021 -0.030950 0.08880 1.111 2.01e-03 0.0355
## 27 0.007007 -0.013094 -0.020480 0.023568 -0.04938 1.137 6.22e-04 0.0440
## 28 0.136319 0.250214 -0.134912 -0.334175 -0.38630 1.036 3.68e-02 0.0845
## 29 -0.013185 -0.002452 0.018433 -0.002444 0.02178 1.253 1.21e-04 0.1290
## 30 -0.002875 0.001184 0.006207 -0.004492 0.01099 1.162 3.08e-05 0.0603
## 31 0.045278 0.035272 -0.023401 -0.060199 0.07974 1.138 1.62e-03 0.0507
## 32 -0.006264 0.003395 0.015776 -0.013719 0.02980 1.167 2.27e-04 0.0656
## 33 0.026882 0.038688 -0.004210 -0.066408 0.07979 1.174 1.62e-03 0.0762
## 34 -0.016780 0.008934 0.011671 -0.006509 -0.03000 1.125 2.30e-04 0.0315
## 35 -0.034284 -0.037520 0.046875 0.027507 0.05890 1.198 8.86e-04 0.0915
## 36 0.080307 -0.022180 -0.067557 0.012037 0.10336 1.140 2.72e-03 0.0570
## 37 -0.029518 -0.173235 0.082968 0.117085 0.21230 1.098 1.13e-02 0.0627
## 38 0.063650 0.002445 -0.060850 -0.005235 0.06889 1.279 1.21e-03 0.1486 *
## 39 0.234021 -0.094311 -0.198071 0.043316 0.29187 1.161 2.15e-02 0.1131
## 40 -0.046604 0.010026 0.037595 0.000406 -0.05562 1.160 7.90e-04 0.0626
## 41 0.000301 -0.001743 0.000438 0.000884 0.00242 1.145 1.50e-06 0.0460
## 42 0.033652 -0.014326 -0.025027 0.003016 0.04345 1.166 4.82e-04 0.0657
## 43 -0.006532 0.004641 0.003757 -0.000916 -0.01015 1.154 2.63e-05 0.0537
## 44 0.052270 -0.055278 0.037499 -0.076230 0.20937 1.110 1.11e-02 0.0673
## 45 -0.000989 -0.001354 0.003029 -0.001467 0.00524 1.212 7.02e-06 0.0990
## 46 0.134007 -0.131559 0.054648 -0.160479 0.43937 1.005 4.72e-02 0.0876
## 47 0.124085 0.741821 -0.216164 -0.651565 -0.80338 1.116 1.56e-01 0.2086
## 48 -0.012963 0.041535 -0.036603 0.033899 -0.11558 1.197 3.40e-03 0.0974
## 49 -0.375174 -0.040334 0.298868 0.180273 -0.38786 1.325 3.80e-02 0.2149 *
## 50 -0.295203 0.406739 -0.058158 0.134279 -0.85333 0.573 1.55e-01 0.0755 *
f<-qf(0.05,4,46)
di<-cooks.distance(modelklasik)
di
## 1 2 3 4 5 6
## 3.239675e-01 3.075728e-04 2.468889e-02 1.347700e-02 1.530272e-03 3.285871e-03
## 7 8 9 10 11 12
## 1.356111e-02 1.656631e-03 1.958666e-06 6.214090e-04 1.175249e-02 1.473896e-02
## 13 14 15 16 17 18
## 6.030553e-03 1.687561e-03 2.677178e-02 7.306679e-01 2.777096e-03 3.313188e-03
## 19 20 21 22 23 24
## 1.560397e-03 8.046384e-02 5.952357e-05 1.727275e-02 4.725096e-03 1.864179e-03
## 25 26 27 28 29 30
## 2.612586e-06 2.005440e-03 6.224667e-04 3.681462e-02 1.211812e-04 3.083742e-05
## 31 32 33 34 35 36
## 1.620477e-03 2.268037e-04 1.624343e-03 2.298549e-04 8.858492e-04 2.719693e-03
## 37 38 39 40 41 42
## 1.134812e-02 1.212175e-03 2.145216e-02 7.897733e-04 1.501783e-06 4.822254e-04
## 43 44 45 46 47 48
## 2.632562e-05 1.105342e-02 7.019639e-06 4.722369e-02 1.564272e-01 3.404278e-03
## 49 50
## 3.798069e-02 1.553137e-01
data.frame(di, di>f)
## di di...f
## 1 3.239675e-01 TRUE
## 2 3.075728e-04 FALSE
## 3 2.468889e-02 FALSE
## 4 1.347700e-02 FALSE
## 5 1.530272e-03 FALSE
## 6 3.285871e-03 FALSE
## 7 1.356111e-02 FALSE
## 8 1.656631e-03 FALSE
## 9 1.958666e-06 FALSE
## 10 6.214090e-04 FALSE
## 11 1.175249e-02 FALSE
## 12 1.473896e-02 FALSE
## 13 6.030553e-03 FALSE
## 14 1.687561e-03 FALSE
## 15 2.677178e-02 FALSE
## 16 7.306679e-01 TRUE
## 17 2.777096e-03 FALSE
## 18 3.313188e-03 FALSE
## 19 1.560397e-03 FALSE
## 20 8.046384e-02 FALSE
## 21 5.952357e-05 FALSE
## 22 1.727275e-02 FALSE
## 23 4.725096e-03 FALSE
## 24 1.864179e-03 FALSE
## 25 2.612586e-06 FALSE
## 26 2.005440e-03 FALSE
## 27 6.224667e-04 FALSE
## 28 3.681462e-02 FALSE
## 29 1.211812e-04 FALSE
## 30 3.083742e-05 FALSE
## 31 1.620477e-03 FALSE
## 32 2.268037e-04 FALSE
## 33 1.624343e-03 FALSE
## 34 2.298549e-04 FALSE
## 35 8.858492e-04 FALSE
## 36 2.719693e-03 FALSE
## 37 1.134812e-02 FALSE
## 38 1.212175e-03 FALSE
## 39 2.145216e-02 FALSE
## 40 7.897733e-04 FALSE
## 41 1.501783e-06 FALSE
## 42 4.822254e-04 FALSE
## 43 2.632562e-05 FALSE
## 44 1.105342e-02 FALSE
## 45 7.019639e-06 FALSE
## 46 4.722369e-02 FALSE
## 47 1.564272e-01 FALSE
## 48 3.404278e-03 FALSE
## 49 3.798069e-02 FALSE
## 50 1.553137e-01 FALSE
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
cooks_crit = f
model_cooks <- cooks.distance(modelklasik)
df <- data.frame(obs = names(model_cooks),
cooks = model_cooks)
ggplot(df, aes(y = cooks, x = obs)) +
geom_point() +
geom_hline(yintercept = cooks_crit, linetype="dashed") +
labs(title = "Cook's Distance",
subtitle = "Influential Observation ",
x = "Observation Number",
y = "Cook's")
# 1 Nilai harapan sisaan sama dengan nol
t.test(modelklasik$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: modelklasik$residuals
## t = 1.6786e-16, df = 49, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -3603.392 3603.392
## sample estimates:
## mean of x
## 3.009859e-13
# 2 Sisaan saling bebas
dwtest(modelklasik)
##
## Durbin-Watson test
##
## data: modelklasik
## DW = 1.4638, p-value = 0.01622
## alternative hypothesis: true autocorrelation is greater than 0
#3 Ragam sisaan homogen
bptest(modelklasik)
##
## studentized Breusch-Pagan test
##
## data: modelklasik
## BP = 1.68, df = 3, p-value = 0.6414
shapiro.test(modelklasik$residuals)
##
## Shapiro-Wilk normality test
##
## data: modelklasik$residuals
## W = 0.89081, p-value = 0.0002423
vif(modelklasik)
## Marketing.Spend Administration Transport
## 2.144275 1.138004 2.032512
x <- data.matrix(datapsd[,c('Marketing.Spend','Administration','Transport')])
y <- datapsd$Profit
cv.r<-cv.glmnet(x,y,alpha=0)
best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr)
coef(bestridge)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 4.181310e+04
## Marketing.Spend 6.053492e-01
## Administration 6.574391e-02
## Transport 8.337658e-02
Didapatkan Model regresi Ridge ialah : \[Profit = 41813 + 0.6053 Marketing.Spend + 0.0657Administration + 0.08337 Transport \]
# fungsi untuk cari rsquared
rsq<-function(bestmodel,bestlambda,x,y){
#y duga
y.duga <- predict(bestmodel, s = bestlambda, newx = x)
#JKG dan JKT
jkt <- sum((y - mean(y))^2)
jkg <- sum((y.duga- y)^2)
#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq)
}
# R2 ridge
R2.modelridge <- rsq(bestridge,best.lr,x,y)
R2.modelridge
## [1] 0.8957015
#RSE ridge
train_predictionsr <- predict(bestridge,newx = x)
# Hitung residu (selisih antara prediksi dan nilai sebenarnya)
residualsr <- y - train_predictionsr
# Hitung varian residu
dfr <- length(y) - length(bestridge$beta)
residual_variancer <- sum(residualsr^2) / dfr
# Hitung RSE
rse.modelridge <- sqrt(residual_variancer)
rse.modelridge
## [1] 13291.07
Didapatkan nilai R2 pada model ridge $ R^2 = 0.8957014$ dan RSE model ridge didapatkan nilai $RSE =13291.07 $
cv.l<-cv.glmnet(x,y,alpha=1)
best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll)
coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 4.947466e+04
## Marketing.Spend 6.839492e-01
## Administration .
## Transport 5.741564e-02
Didapatkan Model regresi lasso ialah : \[Profit = 48705 + 0.6889 Marketing.Spend + 0.05931 Transport \]
# R2 lasso
R2.modellasso <- rsq(bestlasso,best.ll,x,y)
R2.modellasso
## [1] 0.8986188
#RSElasso
train_predictionsLasso <- predict(bestlasso,newx = x)
# Hitung residu (selisih antara prediksi dan nilai sebenarnya)
residualsLasso <- y - train_predictionsLasso
# Hitung varian residu
dfLasso <- length(y) - length(bestlasso$beta)
residual_varianceLasso <- sum(residualsLasso^2) / dfLasso
# Hitung RSE
rse.modelLasso <- sqrt(residual_varianceLasso)
rse.modelLasso
## [1] 13103.87
Didapatkan nilai R2 pada model lasso $ R^2 = 0.8994172$ dan RSE model ridge didapatkan nilai $RSE =13052.17 $
perbandingan <- matrix(c(R2.modelklasik, R2.modelridge, R2.modellasso, rse.modelklasik, rse.modelridge, rse.modelLasso),ncol=2,byrow = F)
row.names(perbandingan)<- c("Model klasik","model ridge","model lasso")
colnames(perbandingan) <- c("R squared","RSE")
perbandingan
## R squared RSE
## Model klasik 0.9010442 13086.14
## model ridge 0.8957015 13291.07
## model lasso 0.8986188 13103.87
Perbandingan dengan melihat nilai RSE yang memiliki nilai terendah, maka model yang memiliki nilai RSE terendah ialah model lasso dengan nilai $ RSE = 13052.17$ dapat dikatakan bahwa model terbaiknya adalah model lasso
coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 4.947466e+04
## Marketing.Spend 6.839492e-01
## Administration .
## Transport 5.741564e-02
Didapatkan Model regresi lasso ialah : \[Profit = 48705 + 0.6889 Marketing.Spend + 0.05931 Transport \]
Profit akan meningkat jika marketing.spend meningkat dan meningkatnya transport, untuk peubah administration tidaklah berpengaruh terhadap model.