library(readxl)
library(ggplot2)
library(MASS)
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(orcutt)
library(performance)
library(outliers)
data_tv <- data.frame(read_excel("/Users/User/Library/CloudStorage/OneDrive-apps.ipb.ac.id/Kuliah/Semester 1/STA1511 Analisis Statistika/t5question.xlsx", sheet = "Data"))
head(data_tv)
## CTRP P R
## 1 133 111600 1197576
## 2 111 104400 1053648
## 3 129 97200 1124172
## 4 117 79200 987144
## 5 130 126000 1283616
## 6 154 108000 1295100
CTRP <- data_tv$CTRP
P <- data_tv$P
R <- data_tv$R
model_tv <- lm(R~CTRP+P)
s = summary(model_tv)
s$adj.r.squared
## [1] 0.8223261
summary(model_tv)
##
## Call:
## lm(formula = R ~ CTRP + P)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125839 -25848 5388 26146 180440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.101e+04 9.096e+04 0.451 0.655
## CTRP 5.932e+03 5.766e+02 10.287 4.02e-12 ***
## P 3.136e+00 3.032e-01 10.344 3.47e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57550 on 35 degrees of freedom
## Multiple R-squared: 0.8319, Adjusted R-squared: 0.8223
## F-statistic: 86.62 on 2 and 35 DF, p-value: 2.793e-14
par(mfrow = c(2,2))
plot(model_tv)
That is, the red line should be approximately horizontal at zero. The
presence of a pattern may indicate a problem with some aspect of the
linear model.
qqnorm(model_tv$residuals)
qqline(model_tv$residuals)
As we can see from this plot our errors follow the straight line
decently so we will say this assumption is met and discuss possible
issues. The points off the line tell us that we might have skewed data
or, the most likely situation, we have extreme values in our data that
don’t fit well into a normal distribution.
resettest(R~CTRP+P)
##
## RESET test
##
## data: R ~ CTRP + P
## RESET = 0.69907, df1 = 2, df2 = 33, p-value = 0.5043
Linear.
# dengan package olsrr
ols_test_breusch_pagan(model_tv)
##
## Breusch Pagan Test for Heteroskedasticity
## -----------------------------------------
## Ho: the variance is constant
## Ha: the variance is not constant
##
## Data
## -----------------------------
## Response : R
## Variables: fitted values of R
##
## Test Summary
## ----------------------------
## DF = 1
## Chi2 = 0.2265347
## Prob > Chi2 = 0.6341053
# dengan lmtest
bptest(model_tv)
##
## studentized Breusch-Pagan test
##
## data: model_tv
## BP = 1.0386, df = 2, p-value = 0.5949
bptest(model_tv, ~ CTRP*P + I(CTRP^2) + I(P^2), data = data_tv)
##
## studentized Breusch-Pagan test
##
## data: model_tv
## BP = 2.1805, df = 5, p-value = 0.8237
Kesimpulan: ragam residu sama dan konstan.
hist(model_tv$residuals)
ols_plot_resid_hist(model_tv)
boxplot(model_tv$residuals)
# package olsrr
ols_test_normality(model_tv$residuals)
## -----------------------------------------------
## Test Statistic pvalue
## -----------------------------------------------
## Shapiro-Wilk 0.9564 0.1451
## Kolmogorov-Smirnov 0.0937 0.8615
## Cramer-von Mises 3.4035 0.0000
## Anderson-Darling 0.5069 0.1888
## -----------------------------------------------
# shapiro method
shapiro.test(model_tv$residuals)
##
## Shapiro-Wilk normality test
##
## data: model_tv$residuals
## W = 0.95643, p-value = 0.1451
Kesimpulan: Normal
vif(model_tv)
## CTRP P
## 1.055066 1.055066
model_corr_matrix <- cor(data_tv %>%
select(R, CTRP, P),
use = "pairwise.complete.obs")
model_corr_matrix
## R CTRP P
## R 1.0000000 0.5640285 0.5689881
## CTRP 0.5640285 1.0000000 -0.2284556
## P 0.5689881 -0.2284556 1.0000000
corrplot::corrplot(model_corr_matrix)
# using Durbin Watson Test
durbinWatsonTest(model_tv)
## lag Autocorrelation D-W Statistic p-value
## 1 0.2521297 1.212171 0.006
## Alternative hypothesis: rho != 0
lmtest::dwtest(R ~ CTRP + P, data = data_tv)
##
## Durbin-Watson test
##
## data: R ~ CTRP + P
## DW = 1.2122, p-value = 0.005434
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(R~CTRP+P)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: R ~ CTRP + P
## LM test = 3.7217, df = 1, p-value = 0.05371
par(mfrow=c(2,2))
acf(model_tv$residuals)
pacf(model_tv$residuals)
model_co <- cochrane.orcutt(model_tv)
model_co
## Cochrane-orcutt estimation for first order autocorrelation
##
## Call:
## lm(formula = R ~ CTRP + P)
##
## number of interaction: 7
## rho 0.380978
##
## Durbin-Watson statistic
## (original): 1.21217 , p-value: 5.434e-03
## (transformed): 1.75768 , p-value: 2.608e-01
##
## coefficients:
## (Intercept) CTRP P
## 7777.801248 6030.550491 3.309282
optimum rho = 0.380978
rho <- model_co$rho
rho
## [1] 0.380978
nrow(data_tv)
## [1] 38
data_new <- data.frame(Rtrans = c(1:nrow(data_tv)-1),
Ptrans = c(1:nrow(data_tv)-1),
CTRPtrans = c(1:nrow(data_tv)-1))
for(i in 1:nrow(data_new)){
data_new$Rtrans[i]=data_tv$R[i+1]-rho*data_tv$R[i]
data_new$Ptrans[i]=data_tv$P[i+1]-rho*data_tv$P[i]
data_new$CTRPtrans[i]=data_tv$CTRP[i+1]-rho*data_tv$CTRP[i]
data_new
}
data_new
## Rtrans Ptrans CTRPtrans
## 1 597397.9 61882.85 60.32992
## 2 722755.3 57425.90 86.71144
## 3 558859.2 42168.94 67.85384
## 4 907535.8 95826.54 85.42557
## 5 806070.5 59996.77 104.47286
## 6 914039.4 106454.37 90.32939
## 7 386210.8 48167.65 33.23428
## 8 920591.8 129425.90 83.71198
## 9 580247.4 11138.52 86.04459
## 10 864273.3 104398.06 91.09188
## 11 580933.2 82453.73 65.28210
## 12 801837.9 125653.73 69.66362
## 13 726257.6 112795.48 58.18753
## 14 779515.9 64623.96 90.14024
## 15 827547.3 96853.73 94.85384
## 16 826630.8 66167.65 98.13917
## 17 599566.2 111768.29 37.71036
## 18 764210.0 105253.08 67.42611
## 19 875161.6 41310.04 116.37829
## 20 657282.0 97025.90 59.56743
## 21 752192.5 63082.21 79.66362
## 22 679455.1 71311.33 82.37775
## 23 836702.3 107311.33 73.47286
## 24 635217.8 39596.12 81.13970
## 25 478580.5 85368.94 48.23481
## 26 1012944.2 162168.29 87.04513
## 27 510075.5 14051.79 90.75873
## 28 790594.2 79540.46 84.42503
## 29 771329.1 74911.33 76.80601
## 30 595035.5 84339.81 47.85384
## 31 825019.1 51425.25 96.04513
## 32 937963.4 109197.42 94.32992
## 33 758206.2 69767.65 93.75819
## 34 563422.8 59996.77 65.23428
## 35 1012856.9 153254.37 73.52068
## 36 798855.7 102337.87 82.28264
## 37 947628.6 105595.48 68.23481
## 38 NA NA NA
R.trans <- data_new$Rtrans
P.trans <- data_new$Ptrans
CTRP.trans <- data_new$CTRPtrans
model.trans_co <- lm(R.trans~CTRP.trans+P.trans)
summary(model.trans_co)
##
## Call:
## lm(formula = R.trans ~ CTRP.trans + P.trans)
##
## Residuals:
## Min 1Q Median 3Q Max
## -99627 -35358 4984 22532 181875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.815e+03 4.987e+04 0.097 0.924
## CTRP.trans 6.031e+03 5.137e+02 11.740 1.65e-13 ***
## P.trans 3.309e+00 2.736e-01 12.093 7.26e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 55340 on 34 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8773, Adjusted R-squared: 0.8701
## F-statistic: 121.5 on 2 and 34 DF, p-value: 3.247e-16
dwtest(model.trans_co)
##
## Durbin-Watson test
##
## data: model.trans_co
## DW = 1.7577, p-value = 0.2608
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(R.trans~CTRP.trans+P.trans, order=1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: R.trans ~ CTRP.trans + P.trans
## LM test = 0.093627, df = 1, p-value = 0.7596
durbinWatsonTest(model.trans_co)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.04045917 1.757681 0.514
## Alternative hypothesis: rho != 0
https://rstudio-pubs-static.s3.amazonaws.com/866921_78f732ca0f884e6ca5c4d94cfb4e2ae0.html
# for CTRP
testCTRP1 <- grubbs.test(data_tv$CTRP)
testCTRP1
##
## Grubbs test for one outlier
##
## data: data_tv$CTRP
## G = 2.12673, U = 0.87445, p-value = 0.5522
## alternative hypothesis: lowest value 90 is an outlier
testCTRP2 <- grubbs.test(data_tv$CTRP, opposite = T)
testCTRP2
##
## Grubbs test for one outlier
##
## data: data_tv$CTRP
## G = 1.78945, U = 0.91112, p-value = 1
## alternative hypothesis: highest value 156 is an outlier
# for P
testP1 <- grubbs.test(data_tv$P)
testP1
##
## Grubbs test for one outlier
##
## data: data_tv$P
## G = 2.40296, U = 0.83972, p-value = 0.2423
## alternative hypothesis: highest value 208800 is an outlier
testP2 <- grubbs.test(data_tv$P, opposite = T)
testP2
##
## Grubbs test for one outlier
##
## data: data_tv$P
## G = 1.75271, U = 0.91473, p-value = 1
## alternative hypothesis: lowest value 75600 is an outlier
# for R
testR1 <- grubbs.test(data_tv$R)
testR1
##
## Grubbs test for one outlier
##
## data: data_tv$R
## G = 2.16796, U = 0.86954, p-value = 0.4917
## alternative hypothesis: lowest value 904776 is an outlier
testR2 <- grubbs.test(data_tv$R, opposite = T)
testR2
##
## Grubbs test for one outlier
##
## data: data_tv$R
## G = 1.87974, U = 0.90192, p-value = 1
## alternative hypothesis: highest value 1457400 is an outlier
summary(lm(CTRP~P))
##
## Call:
## lm(formula = CTRP ~ P)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.131 -6.563 0.545 7.558 26.869
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.417e+02 1.156e+01 12.253 2.09e-14 ***
## P -1.201e-04 8.532e-05 -1.408 0.168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.63 on 36 degrees of freedom
## Multiple R-squared: 0.05219, Adjusted R-squared: 0.02586
## F-statistic: 1.982 on 1 and 36 DF, p-value: 0.1677
summary(lm(P~CTRP))
##
## Call:
## lm(formula = P ~ CTRP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56421 -20700 -2762 24036 76221
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 186456.7 39172.1 4.760 3.12e-05 ***
## CTRP -434.5 308.6 -1.408 0.168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31640 on 36 degrees of freedom
## Multiple R-squared: 0.05219, Adjusted R-squared: 0.02586
## F-statistic: 1.982 on 1 and 36 DF, p-value: 0.1677
resp <- model_tv$residuals^2
CTRP2 <- CTRP^2
P2 <- P^2
Comb <- CTRP*P
summary(lm(resp~CTRP+P+CTRP2+P2+Comb))
##
## Call:
## lm(formula = resp ~ CTRP + P + CTRP2 + P2 + Comb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.871e+09 -2.825e+09 -1.291e+09 1.374e+07 2.756e+10
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.919e+10 7.496e+10 -0.790 0.436
## CTRP 8.409e+08 8.949e+08 0.940 0.354
## P 1.656e+05 4.581e+05 0.361 0.720
## CTRP2 -2.894e+06 3.148e+06 -0.920 0.365
## P2 -1.966e-02 9.336e-01 -0.021 0.983
## Comb -1.118e+03 2.753e+03 -0.406 0.687
##
## Residual standard error: 6.242e+09 on 32 degrees of freedom
## Multiple R-squared: 0.05738, Adjusted R-squared: -0.0899
## F-statistic: 0.3896 on 5 and 32 DF, p-value: 0.8522
# durbin watson autocorrelation
durbinWatsonTest(model_tv)
## lag Autocorrelation D-W Statistic p-value
## 1 0.2521297 1.212171 0.018
## Alternative hypothesis: rho != 0
x <- c(2,7,5,8,4)
y <- c(3,45,22,58,18)
plot(x,y)
a <- c(2,7,5,8,4)
b <- c(3,18,12,17,11)
plot(a,b)
summary(lm(b~a+a))
##
## Call:
## lm(formula = b ~ a + a)
##
## Residuals:
## 1 2 3 4 5
## -1.5088 1.4737 0.2807 -1.9298 1.6842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2982 2.2633 -0.132 0.90350
## a 2.4035 0.4026 5.970 0.00941 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.922 on 3 degrees of freedom
## Multiple R-squared: 0.9224, Adjusted R-squared: 0.8965
## F-statistic: 35.64 on 1 and 3 DF, p-value: 0.009406
1-pt(5.04/4.3, 47) 1.91/0.35 5.04/4.3