Library

library('olsrr')
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library(MASS)
## Warning: package 'MASS' was built under R version 4.2.3
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
## 
##     cement
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.2.3
## Loading required package: Matrix
## Loaded glmnet 4.1-8

INPUT DATA

datapsd <- read.csv("C:/Users/DELL/Documents/KULIAH/Semester 5/PSD/Tugas Minggu 7/online.csv")

Plot

#Eksplorasi Plot antara Y dengan X1
plot(datapsd$Marketing.Spend,datapsd$Profit)

#Eksplorasi Plot antara Y dengan X2
plot(datapsd$Administration,datapsd$Profit)

#Eksplorasi Plot antara Y dengan X3
plot(datapsd$Transport,datapsd$Profit)

Model Regresi Klasik

modelklasik <- lm(Profit~Marketing.Spend+Administration+Transport,data=datapsd)
s.modelklasik <- summary(modelklasik)
s.modelklasik
## 
## Call:
## lm(formula = Profit ~ Marketing.Spend + Administration + Transport, 
##     data = datapsd)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -49192  -4335    377   6508  33592 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.206e+04  9.201e+03   4.571 3.65e-05 ***
## Marketing.Spend 6.940e-01  5.964e-02  11.637 2.64e-15 ***
## Administration  3.631e-02  7.118e-02   0.510  0.61243    
## Transport       6.817e-02  2.179e-02   3.128  0.00305 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13090 on 46 degrees of freedom
## Multiple R-squared:  0.901,  Adjusted R-squared:  0.8946 
## F-statistic: 139.6 on 3 and 46 DF,  p-value: < 2.2e-16

Model regresi klasik yang diperoleh adalah : \[ Profit = 42060+ 0.6940 Marketing.Spend +0.0361Administration +0.06817Transport \] # R Squared dan RSE Model Klasik

# R squared
R2.modelklasik <- s.modelklasik$r.squared
R2.modelklasik
## [1] 0.9010442
# RSE
rse.modelklasik <- s.modelklasik$sigma
rse.modelklasik
## [1] 13086.14

Rsq Model klasik didapatkan $ R^2 =0.9010442$ dan nilai RSE $RSE = 13086.14 $

Plot Pencilan dan Laverage

ols_plot_resid_lev(modelklasik)

Amatan Berpengaruh

plot(modelklasik,which=5)
ols_plot_diagnostics(modelklasik)

Jarak Cook

influence.measures(modelklasik)
## Influence measures of
##   lm(formula = Profit ~ Marketing.Spend + Administration + Transport,      data = datapsd) :
## 
##       dfb.1_  dfb.Mr.S  dfb.Admn  dfb.Trns    dffit cov.r   cook.d    hat inf
## 1  -0.614352 -0.496627  0.444078  1.054899  1.23390 0.613 3.24e-01 0.1441   *
## 2  -0.019265  0.007061  0.012432  0.014021  0.03470 1.248 3.08e-04 0.1254    
## 3   0.063559  0.158484 -0.139703  0.037444  0.31355 1.144 2.47e-02 0.1102    
## 4  -0.008964  0.103282 -0.041485  0.044993  0.23131 1.115 1.35e-02 0.0756    
## 5  -0.031401 -0.043307  0.047448  0.001729 -0.07742 1.219 1.53e-03 0.1083    
## 6  -0.034434 -0.051028  0.057406 -0.013273 -0.11358 1.168 3.29e-03 0.0781    
## 7   0.008583  0.193321  0.004205 -0.176291  0.23111 1.256 1.36e-02 0.1533    
## 8   0.038105 -0.025812 -0.030499 -0.013041 -0.08061 1.151 1.66e-03 0.0598    
## 9   0.001542 -0.000495 -0.001400 -0.000648 -0.00277 1.157 1.96e-06 0.0562    
## 10 -0.015292 -0.029150  0.022167  0.004856 -0.04933 1.151 6.21e-04 0.0550    
## 11  0.103983  0.138905 -0.103642 -0.084066  0.21716 1.028 1.18e-02 0.0396    
## 12  0.168362  0.129267 -0.181312 -0.064454  0.24252 1.076 1.47e-02 0.0619    
## 13 -0.008878  0.034355  0.017943  0.007945  0.15526 1.028 6.03e-03 0.0243    
## 14 -0.025828  0.003495  0.032552  0.014977  0.08147 1.100 1.69e-03 0.0279    
## 15  0.173932 -0.117639 -0.180825  0.029887 -0.32934 1.015 2.68e-02 0.0638    
## 16 -0.396857 -1.941697  0.630547  1.154717 -2.11564 0.214 7.31e-01 0.1496   *
## 17 -0.003479 -0.028781  0.011997  0.049013  0.10472 1.081 2.78e-03 0.0258    
## 18  0.064751  0.018770 -0.070099 -0.047018 -0.11423 1.113 3.31e-03 0.0435    
## 19 -0.010317  0.001719  0.013403 -0.031593 -0.07831 1.107 1.56e-03 0.0307    
## 20  0.036521  0.346586  0.084858 -0.484077  0.57122 1.136 8.05e-02 0.1664    
## 21 -0.000601  0.006146  0.000433 -0.009952 -0.01526 1.134 5.95e-05 0.0378    
## 22  0.173961  0.134350 -0.193097 -0.166505 -0.26224 1.108 1.73e-02 0.0803    
## 23  0.027082  0.072213 -0.031647 -0.100814 -0.13659 1.102 4.73e-03 0.0441    
## 24 -0.013564  0.042134  0.012695 -0.058781 -0.08554 1.135 1.86e-03 0.0499    
## 25 -0.002468 -0.001836  0.002050  0.002097 -0.00320 1.161 2.61e-06 0.0597    
## 26 -0.009307  0.005204  0.038021 -0.030950  0.08880 1.111 2.01e-03 0.0355    
## 27  0.007007 -0.013094 -0.020480  0.023568 -0.04938 1.137 6.22e-04 0.0440    
## 28  0.136319  0.250214 -0.134912 -0.334175 -0.38630 1.036 3.68e-02 0.0845    
## 29 -0.013185 -0.002452  0.018433 -0.002444  0.02178 1.253 1.21e-04 0.1290    
## 30 -0.002875  0.001184  0.006207 -0.004492  0.01099 1.162 3.08e-05 0.0603    
## 31  0.045278  0.035272 -0.023401 -0.060199  0.07974 1.138 1.62e-03 0.0507    
## 32 -0.006264  0.003395  0.015776 -0.013719  0.02980 1.167 2.27e-04 0.0656    
## 33  0.026882  0.038688 -0.004210 -0.066408  0.07979 1.174 1.62e-03 0.0762    
## 34 -0.016780  0.008934  0.011671 -0.006509 -0.03000 1.125 2.30e-04 0.0315    
## 35 -0.034284 -0.037520  0.046875  0.027507  0.05890 1.198 8.86e-04 0.0915    
## 36  0.080307 -0.022180 -0.067557  0.012037  0.10336 1.140 2.72e-03 0.0570    
## 37 -0.029518 -0.173235  0.082968  0.117085  0.21230 1.098 1.13e-02 0.0627    
## 38  0.063650  0.002445 -0.060850 -0.005235  0.06889 1.279 1.21e-03 0.1486   *
## 39  0.234021 -0.094311 -0.198071  0.043316  0.29187 1.161 2.15e-02 0.1131    
## 40 -0.046604  0.010026  0.037595  0.000406 -0.05562 1.160 7.90e-04 0.0626    
## 41  0.000301 -0.001743  0.000438  0.000884  0.00242 1.145 1.50e-06 0.0460    
## 42  0.033652 -0.014326 -0.025027  0.003016  0.04345 1.166 4.82e-04 0.0657    
## 43 -0.006532  0.004641  0.003757 -0.000916 -0.01015 1.154 2.63e-05 0.0537    
## 44  0.052270 -0.055278  0.037499 -0.076230  0.20937 1.110 1.11e-02 0.0673    
## 45 -0.000989 -0.001354  0.003029 -0.001467  0.00524 1.212 7.02e-06 0.0990    
## 46  0.134007 -0.131559  0.054648 -0.160479  0.43937 1.005 4.72e-02 0.0876    
## 47  0.124085  0.741821 -0.216164 -0.651565 -0.80338 1.116 1.56e-01 0.2086    
## 48 -0.012963  0.041535 -0.036603  0.033899 -0.11558 1.197 3.40e-03 0.0974    
## 49 -0.375174 -0.040334  0.298868  0.180273 -0.38786 1.325 3.80e-02 0.2149   *
## 50 -0.295203  0.406739 -0.058158  0.134279 -0.85333 0.573 1.55e-01 0.0755   *
f<-qf(0.05,4,46)
di<-cooks.distance(modelklasik)
di
##            1            2            3            4            5            6 
## 3.239675e-01 3.075728e-04 2.468889e-02 1.347700e-02 1.530272e-03 3.285871e-03 
##            7            8            9           10           11           12 
## 1.356111e-02 1.656631e-03 1.958666e-06 6.214090e-04 1.175249e-02 1.473896e-02 
##           13           14           15           16           17           18 
## 6.030553e-03 1.687561e-03 2.677178e-02 7.306679e-01 2.777096e-03 3.313188e-03 
##           19           20           21           22           23           24 
## 1.560397e-03 8.046384e-02 5.952357e-05 1.727275e-02 4.725096e-03 1.864179e-03 
##           25           26           27           28           29           30 
## 2.612586e-06 2.005440e-03 6.224667e-04 3.681462e-02 1.211812e-04 3.083742e-05 
##           31           32           33           34           35           36 
## 1.620477e-03 2.268037e-04 1.624343e-03 2.298549e-04 8.858492e-04 2.719693e-03 
##           37           38           39           40           41           42 
## 1.134812e-02 1.212175e-03 2.145216e-02 7.897733e-04 1.501783e-06 4.822254e-04 
##           43           44           45           46           47           48 
## 2.632562e-05 1.105342e-02 7.019639e-06 4.722369e-02 1.564272e-01 3.404278e-03 
##           49           50 
## 3.798069e-02 1.553137e-01
data.frame(di, di>f)
##              di di...f
## 1  3.239675e-01   TRUE
## 2  3.075728e-04  FALSE
## 3  2.468889e-02  FALSE
## 4  1.347700e-02  FALSE
## 5  1.530272e-03  FALSE
## 6  3.285871e-03  FALSE
## 7  1.356111e-02  FALSE
## 8  1.656631e-03  FALSE
## 9  1.958666e-06  FALSE
## 10 6.214090e-04  FALSE
## 11 1.175249e-02  FALSE
## 12 1.473896e-02  FALSE
## 13 6.030553e-03  FALSE
## 14 1.687561e-03  FALSE
## 15 2.677178e-02  FALSE
## 16 7.306679e-01   TRUE
## 17 2.777096e-03  FALSE
## 18 3.313188e-03  FALSE
## 19 1.560397e-03  FALSE
## 20 8.046384e-02  FALSE
## 21 5.952357e-05  FALSE
## 22 1.727275e-02  FALSE
## 23 4.725096e-03  FALSE
## 24 1.864179e-03  FALSE
## 25 2.612586e-06  FALSE
## 26 2.005440e-03  FALSE
## 27 6.224667e-04  FALSE
## 28 3.681462e-02  FALSE
## 29 1.211812e-04  FALSE
## 30 3.083742e-05  FALSE
## 31 1.620477e-03  FALSE
## 32 2.268037e-04  FALSE
## 33 1.624343e-03  FALSE
## 34 2.298549e-04  FALSE
## 35 8.858492e-04  FALSE
## 36 2.719693e-03  FALSE
## 37 1.134812e-02  FALSE
## 38 1.212175e-03  FALSE
## 39 2.145216e-02  FALSE
## 40 7.897733e-04  FALSE
## 41 1.501783e-06  FALSE
## 42 4.822254e-04  FALSE
## 43 2.632562e-05  FALSE
## 44 1.105342e-02  FALSE
## 45 7.019639e-06  FALSE
## 46 4.722369e-02  FALSE
## 47 1.564272e-01  FALSE
## 48 3.404278e-03  FALSE
## 49 3.798069e-02  FALSE
## 50 1.553137e-01  FALSE
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
cooks_crit = f
model_cooks <- cooks.distance(modelklasik)
df <- data.frame(obs = names(model_cooks),
                 cooks = model_cooks)
ggplot(df, aes(y = cooks, x = obs)) +
  geom_point() +
  geom_hline(yintercept = cooks_crit, linetype="dashed") +
  labs(title = "Cook's Distance",
       subtitle = "Influential Observation ",
       x = "Observation Number",
       y = "Cook's")

Uji Asumsi

# 1 Nilai harapan sisaan sama dengan nol
t.test(modelklasik$residuals,
       mu = 0,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  modelklasik$residuals
## t = 1.6786e-16, df = 49, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -3603.392  3603.392
## sample estimates:
##    mean of x 
## 3.009859e-13
# 2 Sisaan saling bebas 

dwtest(modelklasik)
## 
##  Durbin-Watson test
## 
## data:  modelklasik
## DW = 1.4638, p-value = 0.01622
## alternative hypothesis: true autocorrelation is greater than 0
#3 Ragam sisaan homogen

bptest(modelklasik)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelklasik
## BP = 1.68, df = 3, p-value = 0.6414

ASUMSI NORMALITAS SISAAN

shapiro.test(modelklasik$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  modelklasik$residuals
## W = 0.89081, p-value = 0.0002423

Multikol

vif(modelklasik)
## Marketing.Spend  Administration       Transport 
##        2.144275        1.138004        2.032512

Regresi Ridge

x <- data.matrix(datapsd[,c('Marketing.Spend','Administration','Transport')])
y <- datapsd$Profit
cv.r<-cv.glmnet(x,y,alpha=0)

best.lr<-cv.r$lambda.min
bestridge<-glmnet(x,y,alpha=0,lambda=best.lr)
coef(bestridge)
## 4 x 1 sparse Matrix of class "dgCMatrix"
##                           s0
## (Intercept)     4.181310e+04
## Marketing.Spend 6.053492e-01
## Administration  6.574391e-02
## Transport       8.337658e-02

Didapatkan Model regresi Ridge ialah : \[Profit = 41813 + 0.6053 Marketing.Spend + 0.0657Administration + 0.08337 Transport \]

R Squared dan RSE Model Ridge

# fungsi untuk cari rsquared
rsq<-function(bestmodel,bestlambda,x,y){
 #y duga
 y.duga <- predict(bestmodel, s = bestlambda, newx = x)

 #JKG dan JKT
 jkt <- sum((y - mean(y))^2)
 jkg <- sum((y.duga- y)^2)

#find R-Squared
rsq <- 1 - jkg/jkt
return(rsq) 
}

# R2 ridge
R2.modelridge <- rsq(bestridge,best.lr,x,y)
R2.modelridge
## [1] 0.8957015
#RSE ridge
train_predictionsr <- predict(bestridge,newx = x)
# Hitung residu (selisih antara prediksi dan nilai sebenarnya)
residualsr <- y - train_predictionsr
# Hitung varian residu
dfr <- length(y) - length(bestridge$beta)
residual_variancer <- sum(residualsr^2) / dfr
# Hitung RSE
rse.modelridge <- sqrt(residual_variancer)
rse.modelridge
## [1] 13291.07

Didapatkan nilai R2 pada model ridge $ R^2 = 0.8957014$ dan RSE model ridge didapatkan nilai $RSE =13291.07 $

Regresi Lasso

cv.l<-cv.glmnet(x,y,alpha=1)
best.ll<-cv.l$lambda.min
bestlasso<-glmnet(x,y,alpha=1,lambda=best.ll)
coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
##                           s0
## (Intercept)     4.947466e+04
## Marketing.Spend 6.839492e-01
## Administration  .           
## Transport       5.741564e-02

Didapatkan Model regresi lasso ialah : \[Profit = 48705 + 0.6889 Marketing.Spend + 0.05931 Transport \]

R Squared dan RSE Model Lasso

# R2 lasso
R2.modellasso <- rsq(bestlasso,best.ll,x,y)
R2.modellasso
## [1] 0.8986188
#RSElasso
train_predictionsLasso <- predict(bestlasso,newx = x)
# Hitung residu (selisih antara prediksi dan nilai sebenarnya)
residualsLasso <- y - train_predictionsLasso
# Hitung varian residu
dfLasso <- length(y) - length(bestlasso$beta)
residual_varianceLasso <- sum(residualsLasso^2) / dfLasso
# Hitung RSE
rse.modelLasso <- sqrt(residual_varianceLasso)
rse.modelLasso
## [1] 13103.87

Didapatkan nilai R2 pada model lasso $ R^2 = 0.8994172$ dan RSE model ridge didapatkan nilai $RSE =13052.17 $

Perbandingan Model Regresi

perbandingan <- matrix(c(R2.modelklasik, R2.modelridge, R2.modellasso, rse.modelklasik, rse.modelridge, rse.modelLasso),ncol=2,byrow = F)
row.names(perbandingan)<- c("Model klasik","model ridge","model lasso")
colnames(perbandingan) <- c("R squared","RSE")
perbandingan
##              R squared      RSE
## Model klasik 0.9010442 13086.14
## model ridge  0.8957015 13291.07
## model lasso  0.8986188 13103.87

Perbandingan dengan melihat nilai RSE yang memiliki nilai terendah, maka model yang memiliki nilai RSE terendah ialah model lasso dengan nilai $ RSE = 13052.17$ dapat dikatakan bahwa model terbaiknya adalah model lasso

Model Terbaik (Model Lasso)

coef(bestlasso)
## 4 x 1 sparse Matrix of class "dgCMatrix"
##                           s0
## (Intercept)     4.947466e+04
## Marketing.Spend 6.839492e-01
## Administration  .           
## Transport       5.741564e-02

Didapatkan Model regresi lasso ialah : \[Profit = 48705 + 0.6889 Marketing.Spend + 0.05931 Transport \]

Interpretasi Koefesien Model

Profit akan meningkat jika marketing.spend meningkat dan meningkatnya transport, untuk peubah administration tidaklah berpengaruh terhadap model.