Anggota
- Muhammad Nachnoer Novatron Fitra Arss (G1401201014)
- Maulana Ahsan Fadillah (G1401201062)
- Reynaldo Mnurung (G1401201084)
Package
lapply(c("purrr","robustbase","ggpubr","hrbrthemes","gridExtra","grid",
"SimDesign","fitdistrplus","lmtest"),
library,character.only=T)[[1]]
## [1] "purrr" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
Fungsi Uji Normal
uji.normal<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T)
if(any(x<0))x<-x-min(x)+1
mean <- fitdist(x, "norm")$estimate[1]; sd <- fitdist(x, "norm")$estimate[2]
uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
pvalue<-uji$p.value
PVALUE1<-uji1$ad[1,3]
PVALUE2<-uji1$ad[2,3]
t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
pt<-ks.test(t, "pnorm", mean=fitdist(t,"norm")$estimate[1],
sd=fitdist(t,"norm")$estimate[2])$p.value
lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
if(graph==TRUE){
if(graph.transformed==FALSE){
par(mfrow=c(1,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
}
else{
par(mfrow=c(2,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
hist(t, main=paste("Histogram of ",object.name,"transformed"),
xlab=paste(object.name,"transformed"), freq=F,
col="steelblue",border = "white")
lines(density(t),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
}
}
z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05),
ifelse(pvalue>=0.05,
return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal")),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))),
ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=
data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal", 2)))),
ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan= rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=
data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
`Versi 2`=PVALUE2),
Keputusan=rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))))))
return(z)
}
uji.normal1<-function(x, object.name="x", graph=TRUE, graph.transformed=TRUE){
lapply(c("fitdistrplus", "kSamples", "rcompanion"), library, character.only=T)
if(any(x<0))x<-x-min(x)+1
mean <- mean(x); sd <- sd(x)
uji<-ks.test(x, "pnorm", mean=mean, sd=sd)
uji1<- ad.test(x, rnorm(length(x), mean=mean, sd=sd))
pvalue<-uji$p.value
PVALUE1<-uji1$ad[1,3]
PVALUE2<-uji1$ad[2,3]
t<-transformTukey(x,quiet = TRUE,plotit = FALSE)
pt<-ks.test(t, "pnorm", mean=mean(t),
sd=sd(t))$p.value
lambda<-transformTukey(x,returnLambda =TRUE,quiet=TRUE,plotit = FALSE)
if(graph==TRUE){
if(graph.transformed==FALSE){
par(mfrow=c(1,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
}
else{
par(mfrow=c(2,2))
hist(x, freq=F, col="steelblue", border="white",
main=paste("Histogram of ",object.name),xlab=object.name)
lines(density(x),lwd=2, col="coral")
hist(t, main=paste("Histogram of ",object.name,"transformed"),
xlab=paste(object.name,"transformed"), freq=F,
col="steelblue",border = "white")
lines(density(t),lwd=2, col="coral")
qqnorm(x,col="coral");qqline(x,col="steelblue",lwd=2)
qqnorm(t, col="coral");qqline(t,col="steelblue", lwd=2)
}
}
z<-ifelse((PVALUE1>=0.05 & PVALUE2<0.05 ||PVALUE1<0.05 & PVALUE2>=0.05),
ifelse(pvalue>=0.05,
return(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal")),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))),
ifelse(((pvalue >= 0.05)&(PVALUE1 >= 0.05||PVALUE2>= 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=
data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1, `Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal", 2)))),
ifelse((pvalue >= 0.05&(PVALUE1 < 0.05||PVALUE2 < 0.05)),
return(list(`Hasil Uji Kolmogorov Smirnov`= data.frame(`P-Value`=pvalue,
Keputusan="Terima H0, data menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=
rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan= rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=
data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
ifelse(pvalue < 0.05&(PVALUE1 >= 0.05||PVALUE2 >= 0.05),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`= rbind(`Versi 1`=PVALUE1,`Versi 2`=PVALUE2),
Keputusan=rep("Terima H0, data menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal")))),
return(list(`Hasil Uji Kolmogorov Smirnov`=data.frame(`P-Value`=pvalue,
Keputusan="Tolak H0, data tidak menyebar normal"),
`Hasil Uji Anderson`=data.frame(`P-Value`=rbind(`Versi 1`=PVALUE1,
`Versi 2`=PVALUE2),
Keputusan=rep("Tolak H0, data tidak menyebar normal",2)),
`lambda transformasi`=lambda,
`Data Hasil Transformasi Tukey`= t,
`Setelah transformasi~Uji Kolmogorov-Smirnov`=data.frame(`P-Value`=pt,
`Keputusan`=ifelse(pt>=0.05,
"Terima H0, data menyebar normal",
"Tolak H0, data tidak menyebar normal"))))))))
return(z)
}
Nomor 1 dan 2c
n dan N
n<-c(4,12,20,60,100)
N<-1000
Populasi
set.seed(123);c<-c(rchisq(250,4),rchisq(250,12),rnorm(250),rnorm(250,5,0.5));sim<-rchisq(1000,500)
Sampel Simetris
sams<-lapply(n,function(x) data.frame(replicate(1000,sample(sim,x))))
Sampel Campuran
samc<-lapply(n,function(x) data.frame(replicate(1000,sample(c,x))))
names(sams)<-paste("n =",n);names(samc)<-paste("n =",n)
l<-1:length(sams)
Sebaran Sampel
meanc<-lapply(l,function(x)colMeans(samc[[x]]));names(meanc)<-paste("n =",n)
means<-lapply(l,function(x)colMeans(sams[[x]]));names(means)<-paste("n =",n)
Histogram
mc<-map2(meanc,n,function(x,y) gghistogram(x,bins=30,fill="coral",
title=paste("n = ",y,"(","xbar =",round(mean(x),3),")")))
ms<-map2(means,n,function(x,y) gghistogram(x,bins=30,fill="steelblue",
title=paste("n = ",y,"(","xbar =",round(mean(x),3),")")))
G1c<-ggarrange(plotlist = mc)
G1s<-ggarrange(plotlist = ms)
gridExtra::grid.arrange(G1s,
top=textGrob("\nChisq(df=500)\n",
gp=gpar(fontsize=20,font=2)))
gridExtra::grid.arrange(G1c,
top=textGrob("\nChisq(df=500) VS [Chisq(df=4),Chisq(df=12),\nNorm(0,1),Norm(5,0.5)]\n",
gp=gpar(fontsize=20,font=2)))
QQPLOT
set.seed(123)
mcq<-map2(meanc,n,function(x,y) ggqqplot(x,color="coral",
title=paste("n = ",y,"\n(","p-value =",round(uji.normal(x,graph = F)$`Hasil Uji Kolmogorov Smirnov`$P.Value,3),")")))
msq<-map2(means,n,function(x,y) ggqqplot(x,color="steelblue",
title=paste("n = ",y,"\n(","p-value =",round(uji.normal1(x,graph = F)$`Hasil Uji Kolmogorov Smirnov`$P.Value,3),")")))
G1cq<-ggarrange(plotlist = mcq)
G1sq<-ggarrange(plotlist = msq)
gridExtra::grid.arrange(G1sq,
top=textGrob("\nChisq(df=500)\n",
gp=gpar(fontsize=20,font=2)))
gridExtra::grid.arrange(G1cq,
top=textGrob("\n[Chisq(df=4),Chisq(df=12),\nNorm(0,1),Norm(5,0.5)]\n",
gp=gpar(fontsize=20,font=2)))
Nomor 3
Bangkitkan Data
mendefinisikan vektor rataan
set.seed(4)
mu_mvn <- c(10,12)
mendefinisikan matrix korelasi
cor_mat <- matrix(c(1,0.85,0.85,1),nrow=2)
mendefinisikan vektor simpangan baku
sd <- c(2,3)
mengubah matrix korelasi ke kovarians
cov_mat <- MBESS::cor2cov(cor_mat,sd)
membangkitkan data berdistribus multivariate normal
mvn <- rmvnorm(n=100, mean=c(10,12), sigma=cov_mat)
set.seed(4)
membangkitkan data berdasarkan distribusi normal
x1_out <- rnorm(20,25,2)
y1_out <- rnorm(20,25,2)
x1y1_out <-cbind(x1_out,y1_out)
memilih secara acak amatan yang akan diganti dengan pencilan
index_out <- sample.int(nrow(mvn),20)
mengganti amatan dari langkah a dengan amatan dari langkah b
mvn[index_out,] <- x1y1_out
###QQPLOT
qplot(x = V1,y=V2,data = as.data.frame(mvn))
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Parameter Simulasi
mendefinisikan ukuran sampel yang dikaji
n <- seq(10,100,10)
mendefinisikan jumlah ulangan
num_rep2 <- 10000
mendefinisikan vektor simpangan baku
sd0 <- c(2,3)
mendefinisikan matrix korelasi
cor_mat <- matrix(c(1,0.85,0.85,1),nrow=2)
mengubah matrix korelasi ke kovarians
cov_mat <- MBESS::cor2cov(cor_mat,sd0)
mendefinisikan vektor rataan
mu_mvn <- c(10,12)
mendefinisikan mu dan sigma untuk data pencilan
mu1 <- 25
sigma1 <- 2
Fungsi Pembangkitan
generate multivariate normal dengan pencilan
gen_mvn_out <- function(n0,mu0,sigma0,n1,mu1,sigma1){
x_mvn <- rmvnorm(n=n0, mean=mu0, sigma=sigma0)
x1_out <- rnorm(n1,mu1,sigma1)
y1_out <- rnorm(n1,mu1,sigma1)
x1y1_out <-cbind(x1_out,y1_out)
index_out <- sample.int(nrow(x_mvn),n1)
x_mvn[index_out,] <- x1y1_out
return(x_mvn)
}
extract koefisien korelasi
cor_res <- function(x_mat,method){
cor(x_mat[,1],x_mat[,2],method = method)
}
Simulasi
set.seed(4)
membuat objek data.frame dummy untuk menyimpan hasil simulasi
result_sim2 <- data.frame(n=0,correlation=0,type="")
for(i in seq_along(n)){
# membangkitan data berdistribusi multivariate normal dengan ulangan 10rb
sim_mc_mvn <- replicate(num_rep2, gen_mvn_out(n0 = n[i],mu_mvn,cov_mat,n1=0.2*n[i],mu1,sigma1),
simplify = F)
# menghitung keofisien korelasi pearson dari 10rb data hasil bangkitan
pearson_res <- sapply(sim_mc_mvn,cor_res,method="pearson")
# menghitung keofisien spearman pearson dari 10rb data hasil bangkitan
spearman_res <- sapply(sim_mc_mvn,cor_res,method="spearman")
# menghitung rata-rata koefisien
pearson_sim <- mean(pearson_res)
spearman_sim <- mean(spearman_res)
# menyimpan hasil simulasi
result_sim2 <- rbind(result_sim2,list(n[i],pearson_sim,"pearson"),
list(n[i],spearman_sim,"spearman")
)
}
result_sim2 <- result_sim2[-1,]
Grafik Simulasi
ggplot(data = result_sim2,aes(n,correlation))+
geom_point(aes(color=type))+geom_hline(yintercept = 0.85)+scale_y_continuous(limits = c(0.5,1))
Nomor 1a dan 1b
Data campuran 50% normal + 50% chi-square
Definisikan populasi
set.seed(123)
populasi <- c(rnorm(500), rchisq(500, df = 12))
Membuat fungsi
sampling_analysis <- function(n) {
# mengambil sampel acak tanpa pengembalian dari populasi
sample_data <- sample(populasi, n, replace = FALSE)
# menghitung rata-rata dari sampel
sample_mean <- mean(sample_data)
# melakukan uji Kolmogorov-Smirnov
ks_test <- ks.test(sample_data, "pnorm", mean(sample_data), sd(sample_data))
# membuat histogram untuk sampel
hist(sample_data, main = paste0("n = ", n, ", xbar = ", round(sample_mean, 3)), col = "lightblue", breaks = 30)
# membuat qqplot untuk sampel
qqnorm(sample_data, main = paste0("n = ", n, ", p-value = ", round(ks_test$p.value, 5)))
qqline(sample_data)
# mengembalikan nilai rata-rata sampel
return(sample_mean)
}
menampilkan hasil fungsi untuk masing-masing sampel
set.seed(123)
par(mfrow = c(4,2)) # mengatur layout grafik
for (n in c(4, 12, 20, 60, 100)) {
sampling_analysis(n)
}
Data campuran 50% chi-square parameter a + 50% chi-square parameter b
Definisikan populasi
set.seed(12)
populasi <- c(rchisq(500, df = 15), rchisq(500, df = 5))
sampling_analysis <- function(n) {
# mengambil sampel acak tanpa pengembalian dari populasi
sample_data <- sample(populasi, n, replace = FALSE)
# menghitung rata-rata dari sampel
sample_mean <- mean(sample_data)
# melakukan uji Kolmogorov-Smirnov
ks_test <- ks.test(sample_data, "pnorm", mean(sample_data), sd(sample_data))
# membuat histogram untuk sampel
hist(sample_data, main = paste0("n = ", n, ", xbar = ", round(sample_mean, 3)), col = "lightblue", breaks = 30)
# membuat qqplot untuk sampel
qqnorm(sample_data, main = paste0("n = ", n, ", p-value = ", round(ks_test$p.value, 5)))
qqline(sample_data)
# mengembalikan nilai rata-rata sampel
return(sample_mean)
}
Menampilkan hasil fungsi untuk masing-masing sampel
set.seed(123)
par(mfrow = c(4,2)) # mengatur layout grafik
for (n in c(4, 12, 20, 60, 100)) {
sampling_analysis(n)
}
SOAL 1 PEKAN 11
Populasi
n<-c(4,12,20,60,100);N<-1000
set.seed(123);a<- c(rnorm(500,0,1), rchisq(500, df = 1));b<- c(rchisq(500, df = 1), rchisq(500, df = 3))
c<-c(rchisq(250,1),rchisq(250,3),rnorm(250,0,1),rnorm(250,3,1.5))
Sampel
sama<-lapply(n,function(x) data.frame(replicate(1000,sample(a,x))))
samb<-lapply(n,function(x) data.frame(replicate(1000,sample(b,x))))
samc<-lapply(n,function(x) data.frame(replicate(1000,sample(c,x))))
names(samb)<-paste("n =",n);names(samb)<-paste("n =",n);names(samc)<-paste("n =",n)
l<-1:length(samc)
Sebaran Sampel
meana<-lapply(l,function(x)colMeans(sama[[x]]));names(meana)<-paste("n =",n)
meanb<-lapply(l,function(x)colMeans(samb[[x]]));names(meanb)<-paste("n =",n)
meanc<-lapply(l,function(x)colMeans(samc[[x]]));names(meanc)<-paste("n =",n)
Visualisasi
mm51<-map2(meana,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
geom_vline(xintercept = mean(a), color = "red", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(a))$conf.int[1], color = "blue", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(a))$conf.int[2], color = "blue", linetype = "dashed") +
labs(subtitle = paste("n = ",y,",","p =",
round(t.test(x,mu=mean(a))$p.value,3),",","CI = ",
"[",round(t.test(x,mu=mean(a))$conf.int[1],2),",",
round(t.test(x,mu=mean(a))$conf.int[2],2),"]",",",
"lCI = ",round((t.test(x,mu=mean(a))$conf.int[2]-t.test(x,mu=mean(a))$conf.int[1]),2))))
G51<-ggarrange(plotlist = mm51);G51
mm52<-map2(meanb,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
geom_vline(xintercept = mean(b), color = "red", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(b))$conf.int[1], color = "blue", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(b))$conf.int[2], color = "blue", linetype = "dashed") +
labs(subtitle =paste("n = ",y,",","p =",
round(t.test(x,mu=mean(b))$p.value,3),",","CI = ",
"[",round(t.test(x,mu=mean(b))$conf.int[1],2),",",
round(t.test(x,mu=mean(b))$conf.int[2],2),"]",",",
"lCI = ",round((t.test(x,mu=mean(b))$conf.int[2]-t.test(x,mu=mean(b))$conf.int[1]),2))))
G52<-ggarrange(plotlist = mm52);G52
mm53<-map2(meanc,n,function(x,y) ggdensity(x, rug = TRUE, fill = "lightgray")+
geom_vline(xintercept = mean(c), color = "red", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(c))$conf.int[1], color = "blue", linetype = "dashed") +
geom_vline(xintercept = t.test(x,mu=mean(c))$conf.int[2], color = "blue", linetype = "dashed") +
labs(subtitle =paste("n = ",y,",","p =",
round(t.test(x,mu=mean(c))$p.value,3),",","CI = ",
"[",round(t.test(x,mu=mean(c))$conf.int[1],2),",",
round(t.test(x,mu=mean(c))$conf.int[2],2),"]",",",
"lCI = ",round((t.test(x,mu=mean(c))$conf.int[2]-t.test(x,mu=mean(c))$conf.int[1]),2)) ))
G53<-ggarrange(plotlist = mm53);G53
SOAL 2 PEKAN 11
Tentukan parameter simulasi
set.seed(123) # Atur seed untuk reproduksibilitas
n_groups <- 3 # Jumlah kelompok atau perlakuan
n_samples <- c(10, 50, 100) # Jumlah sampel dalam setiap kelompok
n_simulations <- 1000 # Jumlah simulasi
alpha <- 0.05 # Tingkat signifikansi
A. 50% sebaran normal (N(0,1)) dan 50% sebaran chi-square (χ2(df=1)
Fungsi untuk menghasilkan data simulasi
generate_data <- function(n_groups, n_samples, mean_values, sd_values, violation) {
data <- NULL
for (i in 1:n_groups) {
if (violation) {
data_group <- rchisq(n_samples[i], df = 1) # Menggunakan distribusi chi-square untuk melanggar asumsi sebaran normal
} else {
data_group <- rnorm(n_samples[i], mean_values[i], sd_values[i])
}
data <- rbind(data, data.frame(Group = factor(rep(i, n_samples[i])), Value = data_group))
}
return(data)
}
Fungsi untuk melakukan uji ANOVA dan mengembalikan hasil
perform_anova <- function(data) {
model <- lm(Value ~ Group, data = data)
anova_result <- anova(model)
p_value <- anova_result$`Pr(>F)`[1]
return(p_value)
}
Inisialisasi data frame untuk menyimpan hasil simulasi
results <- data.frame(Sim_Number = integer(),
Sample_Size = integer(),
Violation = logical(),
P_Value = numeric())
Loop simulasi
for (sim in 1:n_simulations) {
for (violation in c(FALSE, TRUE)) {
for (sample_size in n_samples) {
# Tentukan nilai mean dan sd untuk setiap kelompok
mean_values <- c(0, 0, 0) # Misalkan ini adalah mean yang sebenarnya untuk setiap kelompok
sd_values <- c(1, 1, 1) # Misalkan ini adalah standar deviasi yang sebenarnya untuk setiap kelompok
# Bangkitkan data simulasi
data <- generate_data(n_groups, rep(sample_size, n_groups), mean_values, sd_values, violation)
# Lakukan uji ANOVA
p_value <- perform_anova(data)
# Simpan hasil simulasi
results <- rbind(results, data.frame(Sim_Number = sim,
Sample_Size = sample_size,
Violation = violation,
P_Value = p_value))
}
}
}
Plot hasil simulasi
plot1 <- ggplot(results, aes(x = Sample_Size, y = P_Value, color = factor(Violation))) +
geom_point()
plot1
B. 50% sebaran chi-square (χ2(df=1) dan 50% sebaran chi-square (χ2(df=3)
Fungsi untuk menghasilkan data simulasi
set.seed(123)
generate_data <- function(n_groups, n_samples, mean_values, sd_values, violation) {
data <- NULL
for (i in 1:n_groups) {
if (violation) {
data_group <- rchisq(n_samples[i], df = 1) # Menggunakan distribusi chi-square untuk melanggar asumsi sebaran normal
} else {
data_group <- rchisq(n_samples[i], df = 3)
}
data <- rbind(data, data.frame(Group = factor(rep(i, n_samples[i])), Value = data_group))
}
return(data)
}
Fungsi untuk melakukan uji ANOVA dan mengembalikan hasil
perform_anova <- function(data) {
model <- lm(Value ~ Group, data = data)
anova_result <- anova(model)
p_value <- anova_result$`Pr(>F)`[1]
return(p_value)
}
Inisialisasi data frame untuk menyimpan hasil simulasi
results <- data.frame(Sim_Number = integer(),
Sample_Size = integer(),
Violation = logical(),
P_Value = numeric())
Loop simulasi
for (sim in 1:n_simulations) {
for (violation in c(FALSE, TRUE)) {
for (sample_size in n_samples) {
# Tentukan nilai mean dan sd untuk setiap kelompok
mean_values <- c(0, 0, 0) # Misalkan ini adalah mean yang sebenarnya untuk setiap kelompok
sd_values <- c(1, 1, 1) # Misalkan ini adalah standar deviasi yang sebenarnya untuk setiap kelompok
# Bangkitkan data simulasi
data <- generate_data(n_groups, rep(sample_size, n_groups), mean_values, sd_values, violation)
# Lakukan uji ANOVA
p_value <- perform_anova(data)
# Simpan hasil simulasi
results <- rbind(results, data.frame(Sim_Number = sim,
Sample_Size = sample_size,
Violation = violation,
P_Value = p_value))
}
}
}
Plot hasil simulasi
plot2 <- ggplot(results, aes(x = Sample_Size, y = P_Value, color = factor(Violation))) +
geom_point();plot2
SOAL 3 PEKAN 11
1. Membuat dataset
set.seed(123)
n <- 1000
X <- runif(n)
Y <- X^2 + 3 * X + rnorm(n, mean = 0, sd = 1) # x^2 + 3x + c
dataset <- data.frame(X,Y)
2. Melihat dataset
head(dataset)
## X Y
## 1 0.2875775 0.34354054
## 2 0.7883051 1.99264180
## 3 0.4089769 2.42097794
## 4 0.8830174 4.17983325
## 5 0.9404673 2.19671403
## 6 0.0455565 0.04359744
3. Membangun model regresi linier
model <- lm(Y ~ X, data = dataset)
4. Menghitung residual model
residual <- residuals(model)
5. Memeriksa asumsi ragam homogen
a. secara eksplorasi menggunakan plot residual
plot(dataset$X, residual, xlab = "X", ylab = "Residuals")
abline(h = 0, col = "red", lwd = 2)
### b. Uji Kehomogenan Ragam (Breusch-Pagan test)
bptest(model) #ragam menyebar normal
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 0.81837, df = 1, p-value = 0.3657