Tugas Praktikum Analisis Statistika - Advanced Regression Analysis

Load Package

library(readxl)
library(ggplot2)
library(MASS)
library(olsrr)

## 
## Attaching package: 'olsrr'

## The following object is masked from 'package:MASS':
## 
##     cement

## The following object is masked from 'package:datasets':
## 
##     rivers

library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(car)

## Loading required package: carData

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(orcutt)
library(performance)
library(outliers)

Load Dataset

data_tv <- data.frame(read_excel("/Users/User/Library/CloudStorage/OneDrive-apps.ipb.ac.id/Kuliah/Semester 1/STA1511 Analisis Statistika/t5question.xlsx", sheet = "Data"))
head(data_tv)

##   CTRP      P       R
## 1  133 111600 1197576
## 2  111 104400 1053648
## 3  129  97200 1124172
## 4  117  79200  987144
## 5  130 126000 1283616
## 6  154 108000 1295100

Make Model

CTRP <- data_tv$CTRP
P <- data_tv$P
R <- data_tv$R
model_tv <- lm(R~CTRP+P)
s = summary(model_tv)
s$adj.r.squared

## [1] 0.8223261

summary(model_tv)

## 
## Call:
## lm(formula = R ~ CTRP + P)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -125839  -25848    5388   26146  180440 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.101e+04  9.096e+04   0.451    0.655    
## CTRP        5.932e+03  5.766e+02  10.287 4.02e-12 ***
## P           3.136e+00  3.032e-01  10.344 3.47e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 57550 on 35 degrees of freedom
## Multiple R-squared:  0.8319, Adjusted R-squared:  0.8223 
## F-statistic: 86.62 on 2 and 35 DF,  p-value: 2.793e-14

Linearity

Using Plot

par(mfrow = c(2,2))
plot(model_tv)

That is, the red line should be approximately horizontal at zero. The presence of a pattern may indicate a problem with some aspect of the linear model.

qqnorm(model_tv$residuals)
qqline(model_tv$residuals)

As we can see from this plot our errors follow the straight line decently so we will say this assumption is met and discuss possible issues. The points off the line tell us that we might have skewed data or, the most likely situation, we have extreme values in our data that don’t fit well into a normal distribution.

Using Test Statistics

resettest(R~CTRP+P)

## 
##  RESET test
## 
## data:  R ~ CTRP + P
## RESET = 0.69907, df1 = 2, df2 = 33, p-value = 0.5043

Linear.

Homoscedasticity

https://bookdown.org/jimr1603/Intermediate_R_-_R_for_Survey_Analysis/testing-regression-assumptions.html#testing-the-homoscedasticity-assumption

Breusch-Pagan Test

# dengan package olsrr
ols_test_breusch_pagan(model_tv)

## 
##  Breusch Pagan Test for Heteroskedasticity
##  -----------------------------------------
##  Ho: the variance is constant            
##  Ha: the variance is not constant        
## 
##             Data              
##  -----------------------------
##  Response : R 
##  Variables: fitted values of R 
## 
##         Test Summary         
##  ----------------------------
##  DF            =    1 
##  Chi2          =    0.2265347 
##  Prob > Chi2   =    0.6341053

# dengan lmtest
bptest(model_tv)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_tv
## BP = 1.0386, df = 2, p-value = 0.5949

White’s Test

bptest(model_tv, ~ CTRP*P + I(CTRP^2) + I(P^2), data = data_tv)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_tv
## BP = 2.1805, df = 5, p-value = 0.8237

Kesimpulan: ragam residu sama dan konstan.

Normality

Using Plot

hist(model_tv$residuals)

ols_plot_resid_hist(model_tv)

boxplot(model_tv$residuals)

Using Shapiro Wilk Test Statistics

# package olsrr
ols_test_normality(model_tv$residuals)

## -----------------------------------------------
##        Test             Statistic       pvalue  
## -----------------------------------------------
## Shapiro-Wilk              0.9564         0.1451 
## Kolmogorov-Smirnov        0.0937         0.8615 
## Cramer-von Mises          3.4035         0.0000 
## Anderson-Darling          0.5069         0.1888 
## -----------------------------------------------

# shapiro method
shapiro.test(model_tv$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_tv$residuals
## W = 0.95643, p-value = 0.1451

Kesimpulan: Normal

Multicollinearity

Using VIF

vif(model_tv)

##     CTRP        P 
## 1.055066 1.055066

Plot Correlation

model_corr_matrix <- cor(data_tv %>% 
                            select(R, CTRP, P),
                          use = "pairwise.complete.obs")
model_corr_matrix

##              R       CTRP          P
## R    1.0000000  0.5640285  0.5689881
## CTRP 0.5640285  1.0000000 -0.2284556
## P    0.5689881 -0.2284556  1.0000000

corrplot::corrplot(model_corr_matrix)

Autocorrelation

# using Durbin Watson Test
durbinWatsonTest(model_tv)

##  lag Autocorrelation D-W Statistic p-value
##    1       0.2521297      1.212171   0.006
##  Alternative hypothesis: rho != 0

lmtest::dwtest(R ~ CTRP + P, data = data_tv)

## 
##  Durbin-Watson test
## 
## data:  R ~ CTRP + P
## DW = 1.2122, p-value = 0.005434
## alternative hypothesis: true autocorrelation is greater than 0

bgtest(R~CTRP+P)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  R ~ CTRP + P
## LM test = 3.7217, df = 1, p-value = 0.05371

http://www.sthda.com/english/articles/39-regression-model-diagnostics/161-linear-regression-assumptions-and-diagnostics-in-r-essentials/

Handling Autocorrelation

par(mfrow=c(2,2))
acf(model_tv$residuals)
pacf(model_tv$residuals)

model_co <- cochrane.orcutt(model_tv)
model_co

## Cochrane-orcutt estimation for first order autocorrelation 
##  
## Call:
## lm(formula = R ~ CTRP + P)
## 
##  number of interaction: 7
##  rho 0.380978
## 
## Durbin-Watson statistic 
## (original):    1.21217 , p-value: 5.434e-03
## (transformed): 1.75768 , p-value: 2.608e-01
##  
##  coefficients: 
## (Intercept)        CTRP           P 
## 7777.801248 6030.550491    3.309282

optimum rho = 0.380978

rho <- model_co$rho
rho

## [1] 0.380978

nrow(data_tv)

## [1] 38

data_new <- data.frame(Rtrans = c(1:nrow(data_tv)-1),
                       Ptrans = c(1:nrow(data_tv)-1),
                       CTRPtrans = c(1:nrow(data_tv)-1))
for(i in 1:nrow(data_new)){
  data_new$Rtrans[i]=data_tv$R[i+1]-rho*data_tv$R[i]
  data_new$Ptrans[i]=data_tv$P[i+1]-rho*data_tv$P[i]
  data_new$CTRPtrans[i]=data_tv$CTRP[i+1]-rho*data_tv$CTRP[i]
  data_new
}
data_new

##       Rtrans    Ptrans CTRPtrans
## 1   597397.9  61882.85  60.32992
## 2   722755.3  57425.90  86.71144
## 3   558859.2  42168.94  67.85384
## 4   907535.8  95826.54  85.42557
## 5   806070.5  59996.77 104.47286
## 6   914039.4 106454.37  90.32939
## 7   386210.8  48167.65  33.23428
## 8   920591.8 129425.90  83.71198
## 9   580247.4  11138.52  86.04459
## 10  864273.3 104398.06  91.09188
## 11  580933.2  82453.73  65.28210
## 12  801837.9 125653.73  69.66362
## 13  726257.6 112795.48  58.18753
## 14  779515.9  64623.96  90.14024
## 15  827547.3  96853.73  94.85384
## 16  826630.8  66167.65  98.13917
## 17  599566.2 111768.29  37.71036
## 18  764210.0 105253.08  67.42611
## 19  875161.6  41310.04 116.37829
## 20  657282.0  97025.90  59.56743
## 21  752192.5  63082.21  79.66362
## 22  679455.1  71311.33  82.37775
## 23  836702.3 107311.33  73.47286
## 24  635217.8  39596.12  81.13970
## 25  478580.5  85368.94  48.23481
## 26 1012944.2 162168.29  87.04513
## 27  510075.5  14051.79  90.75873
## 28  790594.2  79540.46  84.42503
## 29  771329.1  74911.33  76.80601
## 30  595035.5  84339.81  47.85384
## 31  825019.1  51425.25  96.04513
## 32  937963.4 109197.42  94.32992
## 33  758206.2  69767.65  93.75819
## 34  563422.8  59996.77  65.23428
## 35 1012856.9 153254.37  73.52068
## 36  798855.7 102337.87  82.28264
## 37  947628.6 105595.48  68.23481
## 38        NA        NA        NA

R.trans <- data_new$Rtrans
P.trans <- data_new$Ptrans
CTRP.trans <- data_new$CTRPtrans
model.trans_co <- lm(R.trans~CTRP.trans+P.trans)
summary(model.trans_co)

## 
## Call:
## lm(formula = R.trans ~ CTRP.trans + P.trans)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -99627 -35358   4984  22532 181875 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.815e+03  4.987e+04   0.097    0.924    
## CTRP.trans  6.031e+03  5.137e+02  11.740 1.65e-13 ***
## P.trans     3.309e+00  2.736e-01  12.093 7.26e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 55340 on 34 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8773, Adjusted R-squared:  0.8701 
## F-statistic: 121.5 on 2 and 34 DF,  p-value: 3.247e-16

Test Autocorrelation

dwtest(model.trans_co)

## 
##  Durbin-Watson test
## 
## data:  model.trans_co
## DW = 1.7577, p-value = 0.2608
## alternative hypothesis: true autocorrelation is greater than 0

bgtest(R.trans~CTRP.trans+P.trans, order=1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  R.trans ~ CTRP.trans + P.trans
## LM test = 0.093627, df = 1, p-value = 0.7596

durbinWatsonTest(model.trans_co)

##  lag Autocorrelation D-W Statistic p-value
##    1     -0.04045917      1.757681   0.514
##  Alternative hypothesis: rho != 0

https://rstudio-pubs-static.s3.amazonaws.com/866921_78f732ca0f884e6ca5c4d94cfb4e2ae0.html

Outlier Detection

# for CTRP
testCTRP1 <- grubbs.test(data_tv$CTRP)
testCTRP1

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$CTRP
## G = 2.12673, U = 0.87445, p-value = 0.5522
## alternative hypothesis: lowest value 90 is an outlier

testCTRP2 <- grubbs.test(data_tv$CTRP, opposite = T)
testCTRP2

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$CTRP
## G = 1.78945, U = 0.91112, p-value = 1
## alternative hypothesis: highest value 156 is an outlier

# for P
testP1 <- grubbs.test(data_tv$P)
testP1

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$P
## G = 2.40296, U = 0.83972, p-value = 0.2423
## alternative hypothesis: highest value 208800 is an outlier

testP2 <- grubbs.test(data_tv$P, opposite = T)
testP2

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$P
## G = 1.75271, U = 0.91473, p-value = 1
## alternative hypothesis: lowest value 75600 is an outlier

# for R
testR1 <- grubbs.test(data_tv$R)
testR1

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$R
## G = 2.16796, U = 0.86954, p-value = 0.4917
## alternative hypothesis: lowest value 904776 is an outlier

testR2 <- grubbs.test(data_tv$R, opposite = T)
testR2

## 
##  Grubbs test for one outlier
## 
## data:  data_tv$R
## G = 1.87974, U = 0.90192, p-value = 1
## alternative hypothesis: highest value 1457400 is an outlier

VIF

summary(lm(CTRP~P))

## 
## Call:
## lm(formula = CTRP ~ P)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.131  -6.563   0.545   7.558  26.869 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.417e+02  1.156e+01  12.253 2.09e-14 ***
## P           -1.201e-04  8.532e-05  -1.408    0.168    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.63 on 36 degrees of freedom
## Multiple R-squared:  0.05219,    Adjusted R-squared:  0.02586 
## F-statistic: 1.982 on 1 and 36 DF,  p-value: 0.1677

summary(lm(P~CTRP))

## 
## Call:
## lm(formula = P ~ CTRP)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -56421 -20700  -2762  24036  76221 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 186456.7    39172.1   4.760 3.12e-05 ***
## CTRP          -434.5      308.6  -1.408    0.168    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31640 on 36 degrees of freedom
## Multiple R-squared:  0.05219,    Adjusted R-squared:  0.02586 
## F-statistic: 1.982 on 1 and 36 DF,  p-value: 0.1677

White Test

resp <- model_tv$residuals^2
CTRP2 <- CTRP^2
P2 <- P^2
Comb <- CTRP*P
summary(lm(resp~CTRP+P+CTRP2+P2+Comb))

## 
## Call:
## lm(formula = resp ~ CTRP + P + CTRP2 + P2 + Comb)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.871e+09 -2.825e+09 -1.291e+09  1.374e+07  2.756e+10 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.919e+10  7.496e+10  -0.790    0.436
## CTRP         8.409e+08  8.949e+08   0.940    0.354
## P            1.656e+05  4.581e+05   0.361    0.720
## CTRP2       -2.894e+06  3.148e+06  -0.920    0.365
## P2          -1.966e-02  9.336e-01  -0.021    0.983
## Comb        -1.118e+03  2.753e+03  -0.406    0.687
## 
## Residual standard error: 6.242e+09 on 32 degrees of freedom
## Multiple R-squared:  0.05738,    Adjusted R-squared:  -0.0899 
## F-statistic: 0.3896 on 5 and 32 DF,  p-value: 0.8522

# durbin watson autocorrelation
durbinWatsonTest(model_tv)

##  lag Autocorrelation D-W Statistic p-value
##    1       0.2521297      1.212171   0.018
##  Alternative hypothesis: rho != 0

x <- c(2,7,5,8,4)
y <- c(3,45,22,58,18)
plot(x,y)

a <- c(2,7,5,8,4)
b <- c(3,18,12,17,11)
plot(a,b)

summary(lm(b~a+a))

## 
## Call:
## lm(formula = b ~ a + a)
## 
## Residuals:
##       1       2       3       4       5 
## -1.5088  1.4737  0.2807 -1.9298  1.6842 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.2982     2.2633  -0.132  0.90350   
## a             2.4035     0.4026   5.970  0.00941 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.922 on 3 degrees of freedom
## Multiple R-squared:  0.9224, Adjusted R-squared:  0.8965 
## F-statistic: 35.64 on 1 and 3 DF,  p-value: 0.009406

1-pt(5.04/4.3, 47) 1.91/0.35 5.04/4.3

Tugas Praktikum Analisis Statistika - Advanced Regression Analysis

2022-09-16

Load Package

Load Dataset

Make Model

Linearity

Using Plot

Using Test Statistics

Homoscedasticity

Breusch-Pagan Test

White’s Test

Normality

Using Plot

Using Shapiro Wilk Test Statistics

Multicollinearity

Using VIF

Plot Correlation

Autocorrelation

Handling Autocorrelation

Test Autocorrelation

Outlier Detection

VIF

White Test