Kelompok 8 P2

- Aprilia Permata Putri G1401201002

- Reza Arya Sukma G1401201025

- Muhammad Zhillan Zakiyyan G1401201092

Package

library(ggplot2)
library(MASS)

No. 1

I. Membangkitkan data yang menyebar Normal(0,1)

set.seed(92)
datano1a <- rnorm(1000)
head(datano1a)
## [1] -1.4787834 -0.4975004  0.9303314 -1.1882254 -1.7453041 -0.9815744

II. Membangkitkan data campuran dengan rincian:

A. 50% Menyebar Normal(0,1) dan 50% Menyebar Chi-Square(1)

B. 50% Menyebar Chi-Square(1) dan 50% Menyebar Chi-Square(2)

C. 25% Menyebar Chi-Square(1), 25% Menyebar Chi-Square(2), 25% Menyebar Normal(0,1), dan 25% Menyebar Normal(1,1)

set.seed(92)
datano1b <- c(rnorm(500), rchisq(500, 1))
datano1c <- c(rchisq(500, 1), rchisq(500, 2))
datano1d <- c(rchisq(250, 1), rchisq(250, 2), rnorm(250), rnorm(250, mean=1))
datano1 <- data.frame("simetris"=datano1a, "normchisq"=datano1b, "chisq2"=datano1c, "2chisq2norm"=datano1d)
head(datano1)
##     simetris  normchisq    chisq2 X2chisq2norm
## 1 -1.4787834 -1.4787834 1.6869314   1.10722600
## 2 -0.4975004 -0.4975004 1.1862246   0.80808507
## 3  0.9303314  0.9303314 0.4067606   1.57722986
## 4 -1.1882254 -1.1882254 2.2663602   0.58901896
## 5 -1.7453041 -1.7453041 0.4738179   0.04948552
## 6 -0.9815744 -0.9815744 3.9712822   1.86897423

III. Eksplorasi Bentuk Sebaran Data

par(mfrow=c(2,2))
hist(datano1$simetris, main="Data Simetris", xlab="Normal~(0,1)")
hist(datano1$normchisq, main="Data Campuran B", xlab="Normal~(0,1) & Chi-Square(1)")
hist(datano1$chisq2, main="Data Campuran C", xlab="Chi-Square(1) & Chi-Square(2)")
hist(datano1$X2chisq2norm, main="Data Campuran D", xlab="Chi-Square(1), Chi-Square(2), Normal~(0,1), & Normal~(1,1)")

IV. Pengambilan Sampel sesuai jumlah yang dibutuhkan

IVA. Eksplorasi Sampel

n1 <- c(4, 12, 20, 60, 100)
dist.rate.df <- data.frame()
for(i in n1){
  set.seed(92)
  cont <- datano1[sample(x=nrow(datano1), size=i),]
  par(mfrow=c(2,2))
  hist(cont$simetris, main=paste("Sample ", i, "Data Simetris", sep=""))
  hist(cont$normchisq, main=paste("Sample ", i, "Data Campuran B", sep=""))
  hist(cont$chisq2, main=paste("Sample ", i, "Data Campuran C", sep=""))
  hist(cont$X2chisq2norm, main=paste("Sample ", i, "Data Campuran D", sep=""))
  rowdf1 <- c(mean(cont$simetris), mean(cont$normchisq), mean(cont$chisq2), mean(cont$X2chisq2norm), sd(cont$simetris), sd(cont$normchisq), sd(cont$chisq2), sd(cont$X2chisq2norm))
  dist.rate.df <- rbind(dist.rate.df, rowdf1, deparse.level=0)
}

colnames(dist.rate.df) <- c("Mean_SS", "Mean_SBA", "Mean_SBB", "Mean_SBC", "SD_SS", "SD_SBA", "SD_SBB", "SD_SBC")

IVB. Membuat data rataan sampel

dist.rate.df <- cbind("S.Size"=n1, dist.rate.df)
knitr::kable(dist.rate.df)
S.Size Mean_SS Mean_SBA Mean_SBB Mean_SBC SD_SS SD_SBA SD_SBB SD_SBC
4 -0.9513174 -1.1923099 1.841388 0.6670522 1.6921728 1.358704 3.309261 0.5202642
12 -0.5314871 -0.0646874 1.502429 0.5616156 1.1666300 1.658077 2.194854 0.8140256
20 -0.4152594 -0.0373615 1.851384 0.6263164 1.0278423 1.358010 2.173710 0.7833835
60 -0.1674908 0.2759806 1.634122 1.1343503 1.0043878 1.189810 1.915949 1.6898345
100 -0.1388678 0.3977934 1.590076 1.1582678 0.9548323 1.219918 1.865786 1.7463544

V. Melihat perbandingan rataan sampel melalui visualisasi

Kriteria yang dibutuhkan = Sebaran Normal dengan mean=0 dan SD=1

plot(dist.rate.df$S.Size, dist.rate.df$Mean_SS, col="red", type="o", pch=19, ylim=c(-2,2), xlab="Banyak Sampel", ylab="Mean")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBA, col="orange", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBA, col="orange")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBB, col="darkgreen", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBB, col="darkgreen")
points(dist.rate.df$S.Size, dist.rate.df$Mean_SBC, col="darkblue", pch=19)
lines(dist.rate.df$S.Size, dist.rate.df$Mean_SBC, col="darkblue")
abline(h=0)
legend("bottomright", legend=c("Simetris", "Campuran B", "Campuran C", "Campuran D"), pch=rep(19,4), col=c("red", "orange", "darkgreen", "darkblue"), title="Legend")

Interpretasi lanjut besok/Yg kepikiran masukin ppt

plot(dist.rate.df$S.Size, dist.rate.df$SD_SS, col="red", type="o", pch="o", ylim=c(0,4), xlab="Banyak Sampel", ylab="SD")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBA, col="orange", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBA, col="orange")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBB, col="darkgreen", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBB, col="darkgreen")
points(dist.rate.df$S.Size, dist.rate.df$SD_SBC, col="darkblue", pch="o")
lines(dist.rate.df$S.Size, dist.rate.df$SD_SBC, col="darkblue")
abline(h=1)
legend("topright", legend=c("Simetris", "Campuran B", "Campuran C", "Campuran D"), pch=rep("o",4), col=c("red", "orange", "darkgreen", "darkblue"), title="Legend")

Interpretasi lanjut besok/Yg kepikiran masukin ppt

No. 2

I. Input informasi awal yang diketahui

mean21 <- c(10, 12)
mean22 <- 25
sd21 <- c(2, 3)
sd22 <- 2
corr <- 0.85
n2 <- c(seq(1, 10, 1))
sigma <- matrix(c(sd21[1]^2, sd21[1]*sd21[2]*corr, sd21[2]*sd21[1]*corr, sd21[2]^2), nrow=2, byrow=T)
sigma
##      [,1] [,2]
## [1,]  4.0  5.1
## [2,]  5.1  9.0

II. Pembangkitan data

set.seed(92)
datano2 <- as.data.frame(mvrnorm(n=1000, mu=mean21, Sigma=sigma))
colnames(datano2) <- c("m10", "m12")
head(datano2)
##         m10       m12
## 1  7.211833  7.656556
## 2  9.043640 10.550196
## 3 11.069818 15.159181
## 4 10.203265  6.986419
## 5  5.836438  7.417971
## 6  8.933136  8.628229

III. Dataset dengan pencilan

set.seed(92)
sigma2 <- matrix(c(sd22^2, (sd22^2)*corr, (sd22^2)*corr, sd22^2), nrow=2, byrow=T)
sigma2
##      [,1] [,2]
## [1,]  4.0  3.4
## [2,]  3.4  4.0
datano2n <- datano2[sample(x=nrow(datano2), size=800),]
pencd <- as.data.frame(mvrnorm(n=200, mu=rep(mean22, 2), Sigma=sigma2))
colnames(pencd) <- c("m10", "m12")
datano2n <- rbind(datano2n, pencd)
head(datano2n)
##           m10       m12
## 466  5.230232  5.621267
## 194  5.658041  4.592735
## 823 11.939364 14.875867
## 266  9.675681 11.934186
## 748  9.770855 12.289778
## 380  7.736224  9.075983

IV. Pengambilan sampel dan hitung korelasi

cor.df <- data.frame("Obs"=c("Cor.Expected", "Data1000", "With_Penc200", "Samp10", "Samp20", "Samp30", "Samp40", "Samp50", "Samp60", "Samp70", "Samp80", "Samp90", "Samp100"), "Cor.Pearson"=c(0.85, cor(datano2$m10, datano2$m12, method="pearson"), cor(datano2n$m10, datano2n$m12, method="pearson"), rep(NA, 10)), "Cor.Spearman"=c(0.85, cor(datano2$m10, datano2$m12, method="spearman"), cor(datano2n$m10, datano2n$m12, method="spearman"), rep(NA, 10)))
for(i in n2){
  set.seed(92)
  samp <- datano2[sample(x=nrow(datano2), size=i*10),]
  penc <- as.data.frame(mvrnorm(n=2*i, mu=rep(mean22, 2), Sigma=sigma2))
  colnames(penc) <- c("m10", "m12")
  samp2 <- samp[sample(x=nrow(samp), size=8*i),]
  samp2 <- rbind(samp2, penc)
  cor.df[i+3, 2] <- cor(samp2$m10, samp2$m12, method="pearson")
  cor.df[i+3, 3] <- cor(samp2$m10, samp2$m12, method="spearman")
}
cor.df
##             Obs Cor.Pearson Cor.Spearman
## 1  Cor.Expected   0.8500000    0.8500000
## 2      Data1000   0.8486674    0.8403804
## 3  With_Penc200   0.9599604    0.9190507
## 4        Samp10   0.9793070    0.9878788
## 5        Samp20   0.9788140    0.9233083
## 6        Samp30   0.9827399    0.9092325
## 7        Samp40   0.9773067    0.9283302
## 8        Samp50   0.9747199    0.8741897
## 9        Samp60   0.9758139    0.8928591
## 10       Samp70   0.9754859    0.9018809
## 11       Samp80   0.9658448    0.9095171
## 12       Samp90   0.9738065    0.9128452
## 13      Samp100   0.9730007    0.9082148

Berdasarkan hasil pembangkitan dan sampling data, didapatkan bahwa metode korelasi spearman memberikan hasil yang lebih sesuai dibandingkan dengan metode korelasi pearson.