alpha1 <- 0
beta1 <- 1
alpha2 <- 3
beta2 <- beta1
Model1 ima koeficiente \[y_1=0+1 * x\]
Model 2 pa \[y_2=3+1 * x\] Empirični koeficienti so rahlo drugačni
n <- 10
x <- 1:n
eps <- scale(rnorm(n))
y1 <- alpha1 + beta1*x+eps
(fit1 <- lm(y1~x))
##
## Call:
## lm(formula = y1 ~ x)
##
## Coefficients:
## (Intercept) x
## 0.4197 0.9237
y2 <- alpha2 + beta2*x+eps
(fit2 <- lm(y2~x))
##
## Call:
## lm(formula = y2 ~ x)
##
## Coefficients:
## (Intercept) x
## 3.4197 0.9237
Slike
Kljub različnim koeficientom je korelacija enaka.
par(mfrow=c(1,2))
plot(x,y1,ylim=c(0,15))
abline(fit1)
segments(x,predict(fit1),x,y1,col="blue")
title(round(cor(x,y1),4))
plot(x,y2,ylim=c(0,15))
abline(fit2)
segments(x,predict(fit2),x,y2,col="blue")
title(round(cor(x,y2),4))
Podobno je, če so napake pripisane drugim enotam:
eps3 <- sample(eps,replace = FALSE)
y3 <- alpha1 + beta1*x+eps3
(fit3 <- lm(y3~x))
##
## Call:
## lm(formula = y3 ~ x)
##
## Coefficients:
## (Intercept) x
## 0.9821 0.8214
par(mfrow=c(1,2))
plot(x,y1,ylim=c(0,15))
abline(fit1)
segments(x,predict(fit1),x,y1,col="blue")
title(round(cor(x,y1),4))
plot(x,y3,ylim=c(0,15))
abline(fit3)
segments(x,predict(fit3),x,y3,col="blue")
title(round(cor(x,y3),4))
par(mfrow=c(1,3))
# priprava odatkov
x <- rep.int(1:5,times=3:7)
y <- rep.int(5:1,times=3:7)
n <- rep.int(3:7,times=3:7)
n
## [1] 3 3 3 4 4 4 4 5 5 5 5 5 6 6 6 6 6 6 7 7 7 7 7 7 7
tbl <- table(x,y)
tbl <- as.data.frame(tbl)
tbl <- tbl[tbl[,3]>0,]
tbl$x <- as.numeric(tbl$x)
tbl$y <- as.numeric(tbl$y)
tbl
## x y Freq
## 5 5 1 7
## 9 4 2 6
## 13 3 3 5
## 17 2 4 4
## 21 1 5 3
# risanje
plot(x,y,col="red",cex=2,pch=16)
# jitter, da se vidijo posamezne pike
plot(jitter(x),jitter(y),pch=21,bg="red",cex=2)
# sqrt, da so ploščine pik sorazmerne frekvenci
# faktor (2*), da pike niso premajhne/prevelike
plot(tbl$x,tbl$y, pch=21, bg="red", cex= 2* sqrt(tbl$Freq))
n <- 20
delta <-10
x1 <- 5+rnorm(n)
y1 <- 10*x1+rnorm(n,0,5)
x2 <- x1+delta
y2 <- 100-5*x1+delta+rnorm(n,0,5)
x <- c(x1,x2)
y <- c(y1,y2)
plot(x,y,pch=16,col=rep(c(2,4),each=n))
abline(lm(y~x))
text(mean(x),mean(y)+5,paste("r=",round(cor(y,x),4)))
# posamezne korelacije
abline(lm(y1~x1),col=2)
text(mean(x1),mean(y1)+30,paste("r=",round(cor(y1,x1),4)),col=2)
abline(lm(y2~x2),col=4)
text(mean(x2),mean(y2)-20,paste("r=",round(cor(y2,x2),4)),col=4)