콜레스키 분해를 이용한 공분산행렬 기반의 난수생성
r.cov<-function(mu.e, mu.x1, mu.x2, cov.ee, cov.e.x1, cov.e.x2, cov.x1.x1, cov.x1.x2, cov.x2.x2, n)
{
mu<-matrix(c(mu.e, mu.x1, mu.x2), ncol=1)
cov<-matrix(c(cov.ee, cov.e.x1, cov.e.x2, cov.e.x1, cov.x1.x1, cov.x1.x2, cov.e.x2, cov.x1.x2, cov.x2.x2), ncol=3)
print("평균벡터")
print(mu)
print("공분산행렬")
print(cov)
print("공분산행렬의 콜레스키 분해")
l <- chol(cov)
print(l)
temp <- t(l)%*%matrix(rnorm(dim(cov)[1]*n), ncol=n)
y<-as.data.frame(t(temp))
names(y) <- c("error", "x1", "x2")
y$error<-y$error + mu.e
y$x1<-y$x1 + mu.x1
y$x2<-y$x2 + mu.x2
return(y)
}
salary<-r.cov(0, 60, 30, 2, 0, 0.1, 2, -0.8, 1, 1000)
## [1] "평균벡터"
## [,1]
## [1,] 0
## [2,] 60
## [3,] 30
## [1] "공분산행렬"
## [,1] [,2] [,3]
## [1,] 2.0 0.0 0.1
## [2,] 0.0 2.0 -0.8
## [3,] 0.1 -0.8 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2] [,3]
## [1,] 1.414214 0.000000 0.07071068
## [2,] 0.000000 1.414214 -0.56568542
## [3,] 0.000000 0.000000 0.82158384
cov(salary)
## error x1 x2
## error 1.92652757 0.06743591 0.1167598
## x1 0.06743591 2.04033120 -0.7970544
## x2 0.11675983 -0.79705436 0.9412816
cor(salary)
## error x1 x2
## error 1.00000000 0.03401365 0.08670538
## x1 0.03401365 1.00000000 -0.57514563
## x2 0.08670538 -0.57514563 1.00000000
salary$y<-with(salary, 1 + 0.2*x1 + 0.3*x2 + error)
library(ggplot2)
ggplot(salary, aes(x=x1, y=x2)) + geom_point() + stat_smooth(method=lm, se=FALSE)
콜레스키 분해를 이용한 공분산행렬 기반의 난수생성
r.cov<-function(mu.x1, mu.x2, cov.x1.x1, cov.x2.x2, cov.x1.x2, n)
{
mu<-matrix(c(mu.x1, mu.x2), ncol=1)
cov<-matrix(c(cov.x1.x1, cov.x1.x2, cov.x1.x2, cov.x2.x2), ncol=2)
print("평균 벡터")
print(mu)
print("공분산 행렬")
print(cov)
print("공분산행렬의 콜레스키 분해")
l <- chol(cov)
print(l)
temp <- t(l)%*%matrix(rnorm(dim(cov)[1]*n), ncol=n)
y<-as.data.frame(t(temp))
names(y) <- c("x1", "x2")
y$x1<-y$x1 + mu.x1
y$x2<-y$x2 + mu.x2
return(y)
}
#상관관계가 음수일때
cor1<-r.cov(0, 0, 1, 1, -0.1, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 -0.1
## [2,] -0.1 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 -0.1000000
## [2,] 0 0.9949874
cor2<-r.cov(0, 0, 1, 1, -0.3, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 -0.3
## [2,] -0.3 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 -0.3000000
## [2,] 0 0.9539392
cor3<-r.cov(0, 0, 1, 1, -0.5, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 -0.5
## [2,] -0.5 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 -0.5000000
## [2,] 0 0.8660254
cor4<-r.cov(0, 0, 1, 1, -0.9, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 -0.9
## [2,] -0.9 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 -0.9000000
## [2,] 0 0.4358899
par(mfrow=c(2,2))
lm1<-lm(x2~x1, data=cor1)
plot(cor1$x1, cor1$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.1")
abline(lm1)
lm2<-lm(x2~x1, data=cor2)
plot(cor2$x1, cor2$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.3")
abline(lm2)
lm3<-lm(x2~x1, data=cor3)
plot(cor3$x1, cor3$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.5")
abline(lm3)
lm4<-lm(x2~x1, data=cor4)
plot(cor4$x1, cor4$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.9")
abline(lm4)
#상관관계가 양수일때
cor1<-r.cov(0, 0, 1, 1, 0.1, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 0.1
## [2,] 0.1 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 0.1000000
## [2,] 0 0.9949874
cor2<-r.cov(0, 0, 1, 1, 0.3, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 0.3
## [2,] 0.3 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 0.3000000
## [2,] 0 0.9539392
cor3<-r.cov(0, 0, 1, 1, 0.5, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 0.5
## [2,] 0.5 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 0.5000000
## [2,] 0 0.8660254
cor4<-r.cov(0, 0, 1, 1, 0.9, 100)
## [1] "평균 벡터"
## [,1]
## [1,] 0
## [2,] 0
## [1] "공분산 행렬"
## [,1] [,2]
## [1,] 1.0 0.9
## [2,] 0.9 1.0
## [1] "공분산행렬의 콜레스키 분해"
## [,1] [,2]
## [1,] 1 0.9000000
## [2,] 0 0.4358899
par(mfrow=c(2,2))
lm1<-lm(x2~x1, data=cor1)
plot(cor1$x1, cor1$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.1")
abline(lm1)
lm2<-lm(x2~x1, data=cor2)
plot(cor2$x1, cor2$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.3")
abline(lm2)
lm3<-lm(x2~x1, data=cor3)
plot(cor3$x1, cor3$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.5")
abline(lm3)
lm4<-lm(x2~x1, data=cor4)
plot(cor4$x1, cor4$x2, xlim=c(-3, 3), ylim=c(-3, 3), main="cor=-0.9")
abline(lm4)
set.seed(1092)
i<-seq(1, 20)
x<-rnorm(20, mean=20, sd=10)
y<-2*x + rnorm(20, mean=20, sd=10)
y[20]<-400
sim<-data.frame(i, x, y)
cor(x,y)
## [1] 0.4356212
cor(x[i<20], y[i<20])
## [1] 0.9423874
library(ggplot2)
# 원본대로 그리기
ggplot(sim, aes(x=x, y=y)) + geom_point() + ylim(c(-10,400)) +
stat_smooth(method=lm, se=FALSE)
X11() #창 하나 더 띄우기
ggplot(sim[i<20,], aes(x=x, y=y)) + geom_point() + ylim(c(-10,400)) +
stat_smooth(method=lm, se=FALSE)
# 두 개 같이 그려 비교하기
x1<-append(x, x)
y1<-append(y, y)
y1[40]<-NA
type<-c(rep("cor1", each=20, times=1), rep("cor2", each=20, times=1))
sim2<-data.frame(x1, y1, type)
ggplot(sim, aes(x=x1, y=y1, color=type)) + geom_point() + ylim(c(-10,400)) +
stat_smooth(method=lm, se=FALSE)
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
coef<-c(rep(2, each=20, times=1), rep(10, each=20, times=1))
x<-rnorm(40, mean=20, sd=10)
y<-coef*x + rnorm(40, mean=0, sd=0)
sim<-data.frame(coef, x, y)
sim$coef.type<-with(sim, as.factor(coef))
#같이 그리기
ggplot(sim, aes(x=x, y=y, color=coef.type)) + geom_point() +
stat_smooth(method=lm, se=FALSE)
#따로 그리기
ggplot(sim, aes(x=x, y=y, color=coef.type)) + geom_point() +
stat_smooth(method=lm, se=FALSE) + facet_grid( ~ coef)
x<-seq(1, 20)
y<-x^2 -20*x + rnorm(20, mean=0, sd=1)
sim<-data.frame(x, y)
ggplot(sim, aes(x=x, y=y)) + geom_point() +
stat_smooth(method=lm, se=FALSE)
install.packages(“corrplot”) install.packages(“psych”) install.packages(“ggm”)
id<-c(1, 2, 3, 4, 5)
q1<-c(6, 4, 4, 3, 1)
q2<-c(6, 6, 4, 1, 2)
q3<-c(5, 5, 4, 4, 1)
q4<-c(4, 3, 2, 2, 1)
cron<-data.frame(id, q1, q2, q3, q4)
summary(cron)
## id q1 q2 q3 q4
## Min. :1 Min. :1.0 Min. :1.0 Min. :1.0 Min. :1.0
## 1st Qu.:2 1st Qu.:3.0 1st Qu.:2.0 1st Qu.:4.0 1st Qu.:2.0
## Median :3 Median :4.0 Median :4.0 Median :4.0 Median :2.0
## Mean :3 Mean :3.6 Mean :3.8 Mean :3.8 Mean :2.4
## 3rd Qu.:4 3rd Qu.:4.0 3rd Qu.:6.0 3rd Qu.:5.0 3rd Qu.:3.0
## Max. :5 Max. :6.0 Max. :6.0 Max. :5.0 Max. :4.0
cor(cron[,2:5])
## q1 q2 q3 q4
## q1 1.0000000 0.7604172 0.8877834 0.9414689
## q2 0.7604172 1.0000000 0.6538566 0.8076923
## q3 0.8877834 0.6538566 1.0000000 0.8540168
## q4 0.9414689 0.8076923 0.8540168 1.0000000
plot(cron[,2:5])
pairs(cron[,2:5])
#다양한 상관관계 plot
#함수 정의
panel.cor <- function(x,y,digits=2,prefix="",cex.cor, ...)
{
usr <-par("usr")
on.exit(par(usr))
par(usr =c(0, 1, 0, 1))
r <-abs(cor(x,y,use="complete.obs"))
txt <-format(c(r, 0.123456789),digits=digits)[1]
txt <-paste(prefix,txt,sep="")
if(missing(cex.cor))cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5,txt,cex =cex.cor * (1 +r) / 2)
}
panel.hist <- function(x, ...)
{
usr <-par("usr")
on.exit(par(usr))
par(usr =c(usr[1:2], 0, 1.5) )
h <-hist(x,plot = FALSE)
breaks <-h$breaks
nB <-length(breaks)
y <-h$counts
y <-y/max(y)
rect(breaks[-nB], 0,breaks[-1],y,col="white", ...)
}
panel.lm <- function (x,y,col =par("col"),bg = NA,pch =par("pch"),
cex = 1,col.smooth = "black", ...)
{
points(x,y,pch =pch,col =col,bg =bg,cex =cex)
abline(stats::lm(y ~ x),col =col.smooth, ...)
}
#다양한 matrix 그림
pairs(cron[,2:5],upper.panel =panel.cor,
diag.panel =panel.hist, lower.panel =panel.lm)
#상관관계 행렬을 나타내는 다양한 그림
library(corrplot)
corrplot(cor(cron[,2:5]))
corrplot(cor(cron[,2:5]) ,method="shade",shade.col=NA,tl.col="black",tl.srt=45)
corrplot(cor(cron[,2:5]),method="shade",shade.col=NA,tl.col="black",tl.srt=45,
addCoef.col="black",addcolorlabel="no",order="AOE")
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt
## = tl.srt, : "addcolorlabel"는 그래픽 매개변수가 아닙니다
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col
## = tl.col, : "addcolorlabel"는 그래픽 매개변수가 아닙니다
## Warning in title(title, ...): "addcolorlabel"는 그래픽 매개변수가 아닙니다
# 귀무가설 검정(rho=0)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:ggplot2':
##
## %+%
?corr.test
## starting httpd help server ... done
tt<-corr.test(cron[,2:5])
print(tt, short=FALSE)
## Call:corr.test(x = cron[, 2:5])
## Correlation matrix
## q1 q2 q3 q4
## q1 1.00 0.76 0.89 0.94
## q2 0.76 1.00 0.65 0.81
## q3 0.89 0.65 1.00 0.85
## q4 0.94 0.81 0.85 1.00
## Sample Size
## [1] 5
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## q1 q2 q3 q4
## q1 0.00 0.29 0.22 0.10
## q2 0.14 0.00 0.29 0.29
## q3 0.04 0.23 0.00 0.26
## q4 0.02 0.10 0.07 0.00
##
## To see confidence intervals of the correlations, print with the short=FALSE option
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## lower r upper p
## q1-q2 -0.37 0.76 0.98 0.14
## q1-q3 0.03 0.89 0.99 0.04
## q1-q4 0.35 0.94 1.00 0.02
## q2-q3 -0.54 0.65 0.97 0.23
## q2-q4 -0.26 0.81 0.99 0.10
## q3-q4 -0.11 0.85 0.99 0.07
library(ggm)
## Loading required package: igraph
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
##
##
## Attaching package: 'ggm'
##
## The following object is masked from 'package:igraph':
##
## pa
pcor(c("q1", "q3", "q2", "q4" ), var(cron[,2:5]))
## [1] 0.4808766
#앞의 둘은 편상관계수 구할 넘, 나머지 뒤의 벡터는 통제
#test statistic for rho=0
p.coef<-pcor(c("q1", "q3", "q2", "q4" ), var(cron[,2:5]))
pcor.test(p.coef, 2, 5)
## $tval
## [1] 0.5484524
##
## $df
## [1] 1
##
## $pvalue
## [1] 0.680637
## code 10.8
cov(cron[,2:5])
## q1 q2 q3 q4
## q1 3.30 3.15 2.65 1.95
## q2 3.15 5.20 2.45 2.10
## q3 2.65 2.45 2.70 1.60
## q4 1.95 2.10 1.60 1.30
library(psych)
alpha(cron[,2:5])
##
## Reliability analysis
## Call: alpha(x = cron[, 2:5])
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.92 0.95 0.95 0.82 18 0.24 3.4 1.6
##
## lower alpha upper 95% confidence boundaries
## 0.45 0.92 1.39
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## q1 0.86 0.91 0.90 0.77 10.1 0.36
## q2 0.94 0.96 0.95 0.89 25.4 0.30
## q3 0.89 0.94 0.93 0.84 15.4 0.34
## q4 0.89 0.91 0.90 0.77 9.9 0.34
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## q1 5 0.96 0.97 0.97 0.92 3.6 1.8
## q2 5 0.89 0.87 0.80 0.76 3.8 2.3
## q3 5 0.90 0.91 0.88 0.83 3.8 1.6
## q4 5 0.96 0.97 0.97 0.94 2.4 1.1
##
## Non missing response frequency for each item
## 1 2 3 4 5 6 miss
## q1 0.2 0.0 0.2 0.4 0.0 0.2 0
## q2 0.2 0.2 0.0 0.2 0.0 0.4 0
## q3 0.2 0.0 0.0 0.4 0.4 0.0 0
## q4 0.2 0.4 0.2 0.2 0.0 0.0 0
#척도변화에 따른 알파값의 변화
cron2<-cron[,2:5]
cron2$q5<-with(cron, q1*10)
cron
## id q1 q2 q3 q4
## 1 1 6 6 5 4
## 2 2 4 6 5 3
## 3 3 4 4 4 2
## 4 4 3 1 4 2
## 5 5 1 2 1 1
alpha(cron2[,2:5])
##
## Reliability analysis
## Call: alpha(x = cron2[, 2:5])
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.44 0.95 0.95 0.82 18 0.45 12 5.6
##
## lower alpha upper 95% confidence boundaries
## -0.44 0.44 1.32
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## q2 0.33 0.96 0.95 0.89 25.4 0.48
## q3 0.36 0.94 0.93 0.84 15.4 0.49
## q4 0.40 0.91 0.90 0.77 9.9 0.50
## q5 0.86 0.91 0.90 0.77 10.1 0.36
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## q2 5 0.80 0.87 0.80 0.76 3.8 2.3
## q3 5 0.90 0.91 0.88 0.88 3.8 1.6
## q4 5 0.95 0.97 0.97 0.95 2.4 1.1
## q5 5 1.00 0.97 0.97 0.92 36.0 18.2