library(ggplot2)
library(MASS)
set.seed(92)
datano1a <- rnorm(1000)
head(datano1a)
## [1] -1.4787834 -0.4975004 0.9303314 -1.1882254 -1.7453041 -0.9815744
set.seed(92)
datano1b <- c(rnorm(500), rchisq(500, 1))
datano1c <- c(rchisq(500, 1), rchisq(500, 2))
datano1d <- c(rchisq(250, 1), rchisq(250, 2), rnorm(250), rnorm(250, mean=1))
datano1 <- data.frame("simetris"=datano1a, "normchisq"=datano1b, "chisq2"=datano1c, "2chisq2norm"=datano1d)
head(datano1)
## simetris normchisq chisq2 X2chisq2norm
## 1 -1.4787834 -1.4787834 1.6869314 1.10722600
## 2 -0.4975004 -0.4975004 1.1862246 0.80808507
## 3 0.9303314 0.9303314 0.4067606 1.57722986
## 4 -1.1882254 -1.1882254 2.2663602 0.58901896
## 5 -1.7453041 -1.7453041 0.4738179 0.04948552
## 6 -0.9815744 -0.9815744 3.9712822 1.86897423
par(mfrow=c(2,2))
hist(datano1$simetris, main="Data Simetris", xlab="Normal~(0,1)")
hist(datano1$normchisq, main="Data Campuran B", xlab="Normal~(0,1) & Chi-Square(1)")
hist(datano1$chisq2, main="Data Campuran C", xlab="Chi-Square(1) & Chi-Square(2)")
hist(datano1$X2chisq2norm, main="Data Campuran D", xlab="Chi-Square(1), Chi-Square(2), Normal~(0,1), & Normal~(1,1)")
n1 <- c(4, 12, 20, 60, 100)
dist.rate.df <- data.frame()
for(i in n1){
set.seed(92)
cont <- datano1[sample(x=nrow(datano1), size=i),]
par(mfrow=c(2,2))
hist(cont$simetris, main=paste("Sample ", i, "Data Simetris", sep=""))
hist(cont$normchisq, main=paste("Sample ", i, "Data Campuran B", sep=""))
hist(cont$chisq2, main=paste("Sample ", i, "Data Campuran C", sep=""))
hist(cont$X2chisq2norm, main=paste("Sample ", i, "Data Campuran D", sep=""))
rowdf1 <- c(mean(cont$simetris), mean(cont$normchisq), mean(cont$chisq2), mean(cont$X2chisq2norm), sd(cont$simetris), sd(cont$normchisq), sd(cont$chisq2), sd(cont$X2chisq2norm))
dist.rate.df <- rbind(dist.rate.df, rowdf1, deparse.level=0)
}
colnames(dist.rate.df) <- c("Mean_SS", "Mean_SBA", "Mean_SBB", "Mean_SBC", "SD_SS", "SD_SBA", "SD_SBB", "SD_SBC")
dist.rate.df <- cbind("S.Size"=n1, dist.rate.df)
knitr::kable(dist.rate.df)
| S.Size | Mean_SS | Mean_SBA | Mean_SBB | Mean_SBC | SD_SS | SD_SBA | SD_SBB | SD_SBC |
|---|---|---|---|---|---|---|---|---|
| 4 | -0.9513174 | -1.1923099 | 1.841388 | 0.6670522 | 1.6921728 | 1.358704 | 3.309261 | 0.5202642 |
| 12 | -0.5314871 | -0.0646874 | 1.502429 | 0.5616156 | 1.1666300 | 1.658077 | 2.194854 | 0.8140256 |
| 20 | -0.4152594 | -0.0373615 | 1.851384 | 0.6263164 | 1.0278423 | 1.358010 | 2.173710 | 0.7833835 |
| 60 | -0.1674908 | 0.2759806 | 1.634122 | 1.1343503 | 1.0043878 | 1.189810 | 1.915949 | 1.6898345 |
| 100 | -0.1388678 | 0.3977934 | 1.590076 | 1.1582678 | 0.9548323 | 1.219918 | 1.865786 | 1.7463544 |
Kriteria yang dibutuhkan = Sebaran Normal dengan mean=0 dan SD=1
plot(dist.rate.df$S.Size, dist.rate.df$Mean_SS, col="red", type="o", pch=19, ylim=c(-2,2), xlab="Banyak Sampel", ylab="Mean")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBA, col="orange", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBA, col="orange")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBB, col="darkgreen", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBB, col="darkgreen")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBC, col="darkblue", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBC, col="darkblue")
abline(h=0)
legend("bottomright", legend=c("Simetris", "Campuran B", "Campuran C", "Campuran D"), pch=rep(19,4), col=c("red", "orange", "darkgreen", "darkblue"), title="Legend")
Interpretasi lanjut besok/Yg kepikiran masukin ppt
plot(dist.rate.df$S.Size, dist.rate.df$SD_SS, col="red", type="o", pch="o", ylim=c(0,4), xlab="Banyak Sampel", ylab="SD")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBA, col="orange", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBA, col="orange")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBB, col="darkgreen", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBB, col="darkgreen")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBC, col="darkblue", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBC, col="darkblue")
abline(h=1)
legend("topright", legend=c("Simetris", "Campuran B", "Campuran C", "Campuran D"), pch=rep("o",4), col=c("red", "orange", "darkgreen", "darkblue"), title="Legend")
Interpretasi lanjut besok/Yg kepikiran masukin ppt
mean21 <- c(10, 12)
mean22 <- 25
sd21 <- c(2, 3)
sd22 <- 2
corr <- 0.85
n2 <- c(seq(1, 10, 1))
sigma <- matrix(c(sd21[1]^2, sd21[1]*sd21[2]*corr, sd21[2]*sd21[1]*corr, sd21[2]^2), nrow=2, byrow=T)
sigma
## [,1] [,2]
## [1,] 4.0 5.1
## [2,] 5.1 9.0
set.seed(92)
datano2 <- as.data.frame(mvrnorm(n=1000, mu=mean21, Sigma=sigma))
colnames(datano2) <- c("m10", "m12")
head(datano2)
## m10 m12
## 1 7.211833 7.656556
## 2 9.043640 10.550196
## 3 11.069818 15.159181
## 4 10.203265 6.986419
## 5 5.836438 7.417971
## 6 8.933136 8.628229
set.seed(92)
sigma2 <- matrix(c(sd22^2, (sd22^2)*corr, (sd22^2)*corr, sd22^2), nrow=2, byrow=T)
sigma2
## [,1] [,2]
## [1,] 4.0 3.4
## [2,] 3.4 4.0
datano2n <- datano2[sample(x=nrow(datano2), size=800),]
pencd <- as.data.frame(mvrnorm(n=200, mu=rep(mean22, 2), Sigma=sigma2))
colnames(pencd) <- c("m10", "m12")
datano2n <- rbind(datano2n, pencd)
head(datano2n)
## m10 m12
## 466 5.230232 5.621267
## 194 5.658041 4.592735
## 823 11.939364 14.875867
## 266 9.675681 11.934186
## 748 9.770855 12.289778
## 380 7.736224 9.075983
cor.df <- data.frame("Obs"=c("Cor.Expected", "Data1000", "With_Penc200", "Samp10", "Samp20", "Samp30", "Samp40", "Samp50", "Samp60", "Samp70", "Samp80", "Samp90", "Samp100"), "Cor.Pearson"=c(0.85, cor(datano2$m10, datano2$m12, method="pearson"), cor(datano2n$m10, datano2n$m12, method="pearson"), rep(NA, 10)), "Cor.Spearman"=c(0.85, cor(datano2$m10, datano2$m12, method="spearman"), cor(datano2n$m10, datano2n$m12, method="spearman"), rep(NA, 10)))
for(i in n2){
set.seed(92)
samp <- datano2[sample(x=nrow(datano2), size=i*10),]
penc <- as.data.frame(mvrnorm(n=2*i, mu=rep(mean22, 2), Sigma=sigma2))
colnames(penc) <- c("m10", "m12")
samp2 <- samp[sample(x=nrow(samp), size=8*i),]
samp2 <- rbind(samp2, penc)
cor.df[i+3, 2] <- cor(samp2$m10, samp2$m12, method="pearson")
cor.df[i+3, 3] <- cor(samp2$m10, samp2$m12, method="spearman")
}
cor.df
## Obs Cor.Pearson Cor.Spearman
## 1 Cor.Expected 0.8500000 0.8500000
## 2 Data1000 0.8486674 0.8403804
## 3 With_Penc200 0.9599604 0.9190507
## 4 Samp10 0.9793070 0.9878788
## 5 Samp20 0.9788140 0.9233083
## 6 Samp30 0.9827399 0.9092325
## 7 Samp40 0.9773067 0.9283302
## 8 Samp50 0.9747199 0.8741897
## 9 Samp60 0.9758139 0.8928591
## 10 Samp70 0.9754859 0.9018809
## 11 Samp80 0.9658448 0.9095171
## 12 Samp90 0.9738065 0.9128452
## 13 Samp100 0.9730007 0.9082148
Berdasarkan hasil pembangkitan dan sampling data, didapatkan bahwa metode korelasi spearman memberikan hasil yang lebih sesuai dibandingkan dengan metode korelasi pearson.